1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
|
/* $OpenBSD: pmap.h,v 1.78 2021/06/18 06:17:28 guenther Exp $ */
/* $NetBSD: pmap.h,v 1.1 2003/04/26 18:39:46 fvdl Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 2001 Wasabi Systems, Inc.
* All rights reserved.
*
* Written by Frank van der Linden for Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed for the NetBSD Project by
* Wasabi Systems, Inc.
* 4. The name of Wasabi Systems, Inc. may not be used to endorse
* or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* pmap.h: see pmap.c for the history of this pmap module.
*/
#ifndef _MACHINE_PMAP_H_
#define _MACHINE_PMAP_H_
#ifndef _LOCORE
#ifdef _KERNEL
#include <machine/cpufunc.h>
#endif /* _KERNEL */
#include <sys/mutex.h>
#include <uvm/uvm_object.h>
#include <machine/pte.h>
#endif
/*
* The x86_64 pmap module closely resembles the i386 one. It uses
* the same recursive entry scheme. See the i386 pmap.h for a
* description. The alternate area trick for accessing non-current
* pmaps has been removed, though, because it performs badly on SMP
* systems.
* The most obvious difference to i386 is that 2 extra levels of page
* table need to be dealt with. The level 1 page table pages are at:
*
* l1: 0x00007f8000000000 - 0x00007fffffffffff (39 bits, needs PML4 entry)
*
* The other levels are kept as physical pages in 3 UVM objects and are
* temporarily mapped for virtual access when needed.
*
* The other obvious difference from i386 is that it has a direct map of all
* physical memory in the VA range:
*
* 0xfffffd8000000000 - 0xffffff7fffffffff
*
* The direct map is used in some cases to access PTEs of non-current pmaps.
*
* Note that address space is signed, so the layout for 48 bits is:
*
* +---------------------------------+ 0xffffffffffffffff
* | Kernel Image |
* +---------------------------------+ 0xffffff8000000000
* | Direct Map |
* +---------------------------------+ 0xfffffd8000000000
* ~ ~
* | |
* | Kernel Space |
* | |
* | |
* +---------------------------------+ 0xffff800000000000 = 0x0000800000000000
* | L1 table (PTE pages) |
* +---------------------------------+ 0x00007f8000000000
* ~ ~
* | |
* | User Space |
* | |
* | |
* +---------------------------------+ 0x0000000000000000
*
* In other words, there is a 'VA hole' at 0x0000800000000000 -
* 0xffff800000000000 which will trap, just as on, for example,
* sparcv9.
*
* The unused space can be used if needed, but it adds a little more
* complexity to the calculations.
*/
/*
* Mask to get rid of the sign-extended part of addresses.
*/
#define VA_SIGN_MASK 0xffff000000000000
#define VA_SIGN_NEG(va) ((va) | VA_SIGN_MASK)
/*
* XXXfvdl this one's not right.
*/
#define VA_SIGN_POS(va) ((va) & ~VA_SIGN_MASK)
#define L4_SLOT_PTE 255
#define L4_SLOT_KERN 256
#define L4_SLOT_KERNBASE 511
#define NUM_L4_SLOT_DIRECT 4
#define L4_SLOT_DIRECT (L4_SLOT_KERNBASE - NUM_L4_SLOT_DIRECT)
#define L4_SLOT_EARLY (L4_SLOT_DIRECT - 1)
#define PDIR_SLOT_KERN L4_SLOT_KERN
#define PDIR_SLOT_PTE L4_SLOT_PTE
#define PDIR_SLOT_DIRECT L4_SLOT_DIRECT
#define PDIR_SLOT_EARLY L4_SLOT_EARLY
/*
* the following defines give the virtual addresses of various MMU
* data structures:
* PTE_BASE: the base VA of the linear PTE mappings
* PDP_PDE: the VA of the PDE that points back to the PDP
*
*/
#define PTE_BASE ((pt_entry_t *) (L4_SLOT_PTE * NBPD_L4))
#define PMAP_DIRECT_BASE (VA_SIGN_NEG((L4_SLOT_DIRECT * NBPD_L4)))
#define PMAP_DIRECT_END (VA_SIGN_NEG(((L4_SLOT_DIRECT + \
NUM_L4_SLOT_DIRECT) * NBPD_L4)))
#define L1_BASE PTE_BASE
#define L2_BASE ((pd_entry_t *)((char *)L1_BASE + L4_SLOT_PTE * NBPD_L3))
#define L3_BASE ((pd_entry_t *)((char *)L2_BASE + L4_SLOT_PTE * NBPD_L2))
#define L4_BASE ((pd_entry_t *)((char *)L3_BASE + L4_SLOT_PTE * NBPD_L1))
#define PDP_PDE (L4_BASE + PDIR_SLOT_PTE)
#define PDP_BASE L4_BASE
#define NKL4_MAX_ENTRIES (unsigned long)1
#define NKL3_MAX_ENTRIES (unsigned long)(NKL4_MAX_ENTRIES * 512)
#define NKL2_MAX_ENTRIES (unsigned long)(NKL3_MAX_ENTRIES * 512)
#define NKL1_MAX_ENTRIES (unsigned long)(NKL2_MAX_ENTRIES * 512)
#define NKL4_KIMG_ENTRIES 1
#define NKL3_KIMG_ENTRIES 1
#define NKL2_KIMG_ENTRIES 64
/* number of pages of direct map entries set up by locore0.S */
#define NDML4_ENTRIES 1
#define NDML3_ENTRIES 1
#define NDML2_ENTRIES 4 /* 4GB */
/*
* Since kva space is below the kernel in its entirety, we start off
* with zero entries on each level.
*/
#define NKL4_START_ENTRIES 0
#define NKL3_START_ENTRIES 0
#define NKL2_START_ENTRIES 0
#define NKL1_START_ENTRIES 0 /* XXX */
#define NTOPLEVEL_PDES (PAGE_SIZE / (sizeof (pd_entry_t)))
#define NPDPG (PAGE_SIZE / sizeof (pd_entry_t))
/*
* pl*_pi: index in the ptp page for a pde mapping a VA.
* (pl*_i below is the index in the virtual array of all pdes per level)
*/
#define pl1_pi(VA) (((VA_SIGN_POS(VA)) & L1_MASK) >> L1_SHIFT)
#define pl2_pi(VA) (((VA_SIGN_POS(VA)) & L2_MASK) >> L2_SHIFT)
#define pl3_pi(VA) (((VA_SIGN_POS(VA)) & L3_MASK) >> L3_SHIFT)
#define pl4_pi(VA) (((VA_SIGN_POS(VA)) & L4_MASK) >> L4_SHIFT)
/*
* pl*_i: generate index into pde/pte arrays in virtual space
*/
#define pl1_i(VA) (((VA_SIGN_POS(VA)) & L1_FRAME) >> L1_SHIFT)
#define pl2_i(VA) (((VA_SIGN_POS(VA)) & L2_FRAME) >> L2_SHIFT)
#define pl3_i(VA) (((VA_SIGN_POS(VA)) & L3_FRAME) >> L3_SHIFT)
#define pl4_i(VA) (((VA_SIGN_POS(VA)) & L4_FRAME) >> L4_SHIFT)
#define pl_i(va, lvl) \
(((VA_SIGN_POS(va)) & ptp_masks[(lvl)-1]) >> ptp_shifts[(lvl)-1])
#define PTP_MASK_INITIALIZER { L1_FRAME, L2_FRAME, L3_FRAME, L4_FRAME }
#define PTP_SHIFT_INITIALIZER { L1_SHIFT, L2_SHIFT, L3_SHIFT, L4_SHIFT }
#define NKPTP_INITIALIZER { NKL1_START_ENTRIES, NKL2_START_ENTRIES, \
NKL3_START_ENTRIES, NKL4_START_ENTRIES }
#define NKPTPMAX_INITIALIZER { NKL1_MAX_ENTRIES, NKL2_MAX_ENTRIES, \
NKL3_MAX_ENTRIES, NKL4_MAX_ENTRIES }
#define NBPD_INITIALIZER { NBPD_L1, NBPD_L2, NBPD_L3, NBPD_L4 }
#define PDES_INITIALIZER { L2_BASE, L3_BASE, L4_BASE }
/*
* PTP macros:
* a PTP's index is the PD index of the PDE that points to it
* a PTP's offset is the byte-offset in the PTE space that this PTP is at
* a PTP's VA is the first VA mapped by that PTP
*/
#define ptp_va2o(va, lvl) (pl_i(va, (lvl)+1) * PAGE_SIZE)
#define PTP_LEVELS 4
/*
* PG_AVAIL usage: we make use of the ignored bits of the PTE
*/
#define PG_W PG_AVAIL1 /* "wired" mapping */
#define PG_PVLIST PG_AVAIL2 /* mapping has entry on pvlist */
/* PG_AVAIL3 not used */
/*
* PCID assignments.
* The shootdown code assumes KERN, PROC, and PROC_INTEL are both
* consecutive and in that order.
*/
#define PCID_KERN 0 /* for pmap_kernel() */
#define PCID_PROC 1 /* non-pmap_kernel(), U+K */
#define PCID_PROC_INTEL 2 /* non-pmap_kernel(), U-K (meltdown) */
#define PCID_TEMP 3 /* temp mapping of another non-pmap_kernel() */
extern int pmap_use_pcid; /* non-zero if PCID support is enabled */
/*
* Number of PTEs per cache line. 8 byte pte, 64-byte cache line
* Used to avoid false sharing of cache lines.
*/
#define NPTECL 8
#if defined(_KERNEL) && !defined(_LOCORE)
/*
* pmap data structures: see pmap.c for details of locking.
*/
struct pmap;
typedef struct pmap *pmap_t;
/*
* we maintain a list of all non-kernel pmaps
*/
LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */
/*
* the pmap structure
*
* note that the pm_obj contains the reference count,
* page list, and number of PTPs within the pmap.
*/
#define PMAP_TYPE_NORMAL 1
#define PMAP_TYPE_EPT 2
#define PMAP_TYPE_RVI 3
#define pmap_nested(pm) ((pm)->pm_type != PMAP_TYPE_NORMAL)
struct pmap {
struct mutex pm_mtx;
struct uvm_object pm_obj[PTP_LEVELS-1]; /* objects for lvl >= 1) */
LIST_ENTRY(pmap) pm_list; /* list (lck by pm_list lock) */
/*
* pm_pdir : VA of page table to be used when executing in
* privileged mode
* pm_pdirpa : PA of page table to be used when executing in
* privileged mode
* pm_pdir_intel : VA of special page table to be used when executing
* on an Intel CPU in usermode (no kernel mappings)
* pm_pdirpa_intel : PA of special page table to be used when executing
* on an Intel CPU in usermode (no kernel mappings)
*/
pd_entry_t *pm_pdir, *pm_pdir_intel;
paddr_t pm_pdirpa, pm_pdirpa_intel;
struct vm_page *pm_ptphint[PTP_LEVELS-1];
/* pointer to a PTP in our pmap */
struct pmap_statistics pm_stats; /* pmap stats (lck by object lock) */
int pm_type; /* Type of pmap this is (PMAP_TYPE_x) */
uint64_t eptp; /* cached EPTP (used by vmm) */
};
/*
* MD flags that we use for pmap_enter (in the pa):
*/
#define PMAP_PA_MASK ~((paddr_t)PAGE_MASK) /* to remove the flags */
#define PMAP_NOCACHE 0x1 /* set the non-cacheable bit. */
#define PMAP_WC 0x2 /* set page write combining. */
/*
* We keep mod/ref flags in struct vm_page->pg_flags.
*/
#define PG_PMAP_MOD PG_PMAP0
#define PG_PMAP_REF PG_PMAP1
#define PG_PMAP_WC PG_PMAP2
/*
* for each managed physical page we maintain a list of <PMAP,VA>'s
* which it is mapped at.
*/
struct pv_entry { /* locked by its list's pvh_lock */
struct pv_entry *pv_next; /* next entry */
struct pmap *pv_pmap; /* the pmap */
vaddr_t pv_va; /* the virtual address */
struct vm_page *pv_ptp; /* the vm_page of the PTP */
};
/*
* global kernel variables
*/
extern struct pmap kernel_pmap_store; /* kernel pmap */
extern long nkptp[];
extern const paddr_t ptp_masks[];
extern const int ptp_shifts[];
extern const long nbpd[], nkptpmax[];
/*
* macros
*/
#define pmap_kernel() (&kernel_pmap_store)
#define pmap_resident_count(pmap) ((pmap)->pm_stats.resident_count)
#define pmap_wired_count(pmap) ((pmap)->pm_stats.wired_count)
#define pmap_update(pmap) /* nothing (yet) */
#define pmap_clear_modify(pg) pmap_clear_attrs(pg, PG_M)
#define pmap_clear_reference(pg) pmap_clear_attrs(pg, PG_U)
#define pmap_copy(DP,SP,D,L,S)
#define pmap_is_modified(pg) pmap_test_attrs(pg, PG_M)
#define pmap_is_referenced(pg) pmap_test_attrs(pg, PG_U)
#define pmap_move(DP,SP,D,L,S)
#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */
#define pmap_proc_iflush(p,va,len) /* nothing */
#define pmap_unuse_final(p) /* nothing */
#define pmap_remove_holes(vm) do { /* nothing */ } while (0)
/*
* prototypes
*/
void map_tramps(void); /* machdep.c */
paddr_t pmap_bootstrap(paddr_t, paddr_t);
void pmap_randomize(void);
void pmap_randomize_level(pd_entry_t *, int);
int pmap_clear_attrs(struct vm_page *, unsigned long);
static void pmap_page_protect(struct vm_page *, vm_prot_t);
void pmap_page_remove (struct vm_page *);
static void pmap_protect(struct pmap *, vaddr_t,
vaddr_t, vm_prot_t);
void pmap_remove(struct pmap *, vaddr_t, vaddr_t);
int pmap_test_attrs(struct vm_page *, unsigned);
static void pmap_update_pg(vaddr_t);
void pmap_write_protect(struct pmap *, vaddr_t,
vaddr_t, vm_prot_t);
void pmap_fix_ept(struct pmap *, vaddr_t);
paddr_t pmap_prealloc_lowmem_ptps(paddr_t);
void pagezero(vaddr_t);
int pmap_convert(struct pmap *, int);
void pmap_enter_special(vaddr_t, paddr_t, vm_prot_t);
vaddr_t pmap_set_pml4_early(paddr_t pa);
void pmap_clear_pml4_early(void);
/*
* functions for flushing the cache for vaddrs and pages.
* these functions are not part of the MI pmap interface and thus
* should not be used as such.
*/
void pmap_flush_cache(vaddr_t, vsize_t);
#define pmap_flush_page(paddr) do { \
KDASSERT(PHYS_TO_VM_PAGE(paddr) != NULL); \
pmap_flush_cache(PMAP_DIRECT_MAP(paddr), PAGE_SIZE); \
} while (/* CONSTCOND */ 0)
#define PMAP_STEAL_MEMORY /* enable pmap_steal_memory() */
#define PMAP_GROWKERNEL /* turn on pmap_growkernel interface */
/*
* inline functions
*/
static inline void
pmap_remove_all(struct pmap *pmap)
{
/* Nothing. */
}
/*
* pmap_update_pg: flush one page from the TLB (or flush the whole thing
* if hardware doesn't support one-page flushing)
*/
inline static void
pmap_update_pg(vaddr_t va)
{
invlpg(va);
}
/*
* pmap_page_protect: change the protection of all recorded mappings
* of a managed page
*
* => this function is a frontend for pmap_page_remove/pmap_clear_attrs
* => we only have to worry about making the page more protected.
* unprotecting a page is done on-demand at fault time.
*/
inline static void
pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
{
if ((prot & PROT_WRITE) == 0) {
if (prot & (PROT_READ | PROT_EXEC)) {
(void) pmap_clear_attrs(pg, PG_RW);
} else {
pmap_page_remove(pg);
}
}
}
/*
* pmap_protect: change the protection of pages in a pmap
*
* => this function is a frontend for pmap_remove/pmap_write_protect
* => we only have to worry about making the page more protected.
* unprotecting a page is done on-demand at fault time.
*/
inline static void
pmap_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
{
if ((prot & PROT_WRITE) == 0) {
if (prot & (PROT_READ| PROT_EXEC)) {
pmap_write_protect(pmap, sva, eva, prot);
} else {
pmap_remove(pmap, sva, eva);
}
}
}
/*
* various address inlines
*
* vtopte: return a pointer to the PTE mapping a VA, works only for
* user and PT addresses
*
* kvtopte: return a pointer to the PTE mapping a kernel VA
*/
static inline pt_entry_t *
vtopte(vaddr_t va)
{
return (PTE_BASE + pl1_i(va));
}
static inline pt_entry_t *
kvtopte(vaddr_t va)
{
#ifdef LARGEPAGES
{
pd_entry_t *pde;
pde = L1_BASE + pl2_i(va);
if (*pde & PG_PS)
return ((pt_entry_t *)pde);
}
#endif
return (PTE_BASE + pl1_i(va));
}
#define PMAP_DIRECT_MAP(pa) ((vaddr_t)PMAP_DIRECT_BASE + (pa))
#define PMAP_DIRECT_UNMAP(va) ((paddr_t)(va) - PMAP_DIRECT_BASE)
#define pmap_map_direct(pg) PMAP_DIRECT_MAP(VM_PAGE_TO_PHYS(pg))
#define pmap_unmap_direct(va) PHYS_TO_VM_PAGE(PMAP_DIRECT_UNMAP(va))
#define __HAVE_PMAP_DIRECT
#endif /* _KERNEL && !_LOCORE */
#ifndef _LOCORE
struct pv_entry;
struct vm_page_md {
struct mutex pv_mtx;
struct pv_entry *pv_list;
};
#define VM_MDPAGE_INIT(pg) do { \
mtx_init(&(pg)->mdpage.pv_mtx, IPL_VM); \
(pg)->mdpage.pv_list = NULL; \
} while (0)
#endif /* !_LOCORE */
#endif /* _MACHINE_PMAP_H_ */
|