diff options
author | Alexander Bluhm <bluhm@cvs.openbsd.org> | 2018-04-11 15:44:09 +0000 |
---|---|---|
committer | Alexander Bluhm <bluhm@cvs.openbsd.org> | 2018-04-11 15:44:09 +0000 |
commit | 5c122cf30c2a07011498dd8355227e8be71eb0a3 (patch) | |
tree | 02a7e86cf4d62a1f58198b4b79b59d902de5b6dd /sys | |
parent | 67cfd0b658caa19398ace8387f73f4f6495ffd2f (diff) |
More steps for i386 Meltdown fix:
- provide struct cpu_info_full
- prepare K-U sections
- reorganize interrupt, trap, syscall entry to use K-U trampoline
- prepare pmap for entering special mappings, the mappings are not
setup yet
This code will already trigger performance issues. We do more tlb
flushes, but we do not unmap the kernel yet. The latter
will be needed to prevent Meltdown.
from hshoexer@; input guenther@; OK mlarkin@ deraadt@
Diffstat (limited to 'sys')
-rw-r--r-- | sys/arch/i386/conf/ld.script | 22 | ||||
-rw-r--r-- | sys/arch/i386/i386/apicvec.s | 46 | ||||
-rw-r--r-- | sys/arch/i386/i386/bios.c | 8 | ||||
-rw-r--r-- | sys/arch/i386/i386/cpu.c | 57 | ||||
-rw-r--r-- | sys/arch/i386/i386/gdt.c | 95 | ||||
-rw-r--r-- | sys/arch/i386/i386/genassym.cf | 58 | ||||
-rw-r--r-- | sys/arch/i386/i386/kvm86.c | 10 | ||||
-rw-r--r-- | sys/arch/i386/i386/kvm86call.S | 13 | ||||
-rw-r--r-- | sys/arch/i386/i386/lapic.c | 14 | ||||
-rw-r--r-- | sys/arch/i386/i386/locore.s | 292 | ||||
-rw-r--r-- | sys/arch/i386/i386/machdep.c | 167 | ||||
-rw-r--r-- | sys/arch/i386/i386/pmap.c | 39 | ||||
-rw-r--r-- | sys/arch/i386/i386/pmapae.c | 13 | ||||
-rw-r--r-- | sys/arch/i386/i386/vector.s | 24 | ||||
-rw-r--r-- | sys/arch/i386/include/asm.h | 20 | ||||
-rw-r--r-- | sys/arch/i386/include/cpu.h | 17 | ||||
-rw-r--r-- | sys/arch/i386/include/cpu_full.h | 63 | ||||
-rw-r--r-- | sys/arch/i386/include/cpufunc.h | 5 | ||||
-rw-r--r-- | sys/arch/i386/include/frame.h | 43 | ||||
-rw-r--r-- | sys/arch/i386/include/gdt.h | 3 | ||||
-rw-r--r-- | sys/arch/i386/include/pcb.h | 4 | ||||
-rw-r--r-- | sys/arch/i386/include/pmap.h | 25 | ||||
-rw-r--r-- | sys/arch/i386/isa/icu.s | 18 | ||||
-rw-r--r-- | sys/arch/i386/isa/npx.c | 8 |
24 files changed, 779 insertions, 285 deletions
diff --git a/sys/arch/i386/conf/ld.script b/sys/arch/i386/conf/ld.script index 32343bd5def..269318dde62 100644 --- a/sys/arch/i386/conf/ld.script +++ b/sys/arch/i386/conf/ld.script @@ -1,4 +1,4 @@ -/* $OpenBSD: ld.script,v 1.8 2017/10/24 20:06:54 guenther Exp $ */ +/* $OpenBSD: ld.script,v 1.9 2018/04/11 15:44:08 bluhm Exp $ */ /* * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org> @@ -56,6 +56,16 @@ SECTIONS locore0.o(.text) *(.text .text.*) } :text =0xcccccccc + + . = ALIGN(__ALIGN_SIZE); + __kernel_kutext_phys = . & 0xfffffff; + .kutext : AT (__kernel_kutext_phys) + { + __kutext_start = ABSOLUTE(.); + *(.kutext) + __kutext_end = ABSOLUTE(.); + } :text =0xcccccccc + PROVIDE (__etext = .); PROVIDE (etext = .); _etext = .; @@ -94,6 +104,16 @@ SECTIONS __data_load = LOADADDR(.data); *(.data .data.*) } :data =0xcccccccc + + . = ALIGN(__ALIGN_SIZE); + __kernel_kudata_phys = . & 0xfffffff; + .kudata : AT (__kernel_kudata_phys) + { + __kudata_start = ABSOLUTE(.); + *(.kudata) + __kudata_end = ABSOLUTE(.); + } + . = ALIGN(0x1000); PROVIDE (edata = .); _edata = .; diff --git a/sys/arch/i386/i386/apicvec.s b/sys/arch/i386/i386/apicvec.s index 6459b5af250..b86527f34c2 100644 --- a/sys/arch/i386/i386/apicvec.s +++ b/sys/arch/i386/i386/apicvec.s @@ -1,4 +1,4 @@ -/* $OpenBSD: apicvec.s,v 1.33 2017/07/06 06:17:05 deraadt Exp $ */ +/* $OpenBSD: apicvec.s,v 1.34 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: apicvec.s,v 1.1.2.2 2000/02/21 21:54:01 sommerfeld Exp $ */ /*- @@ -37,15 +37,12 @@ #include <machine/i82093reg.h> #include <machine/i82489reg.h> -#define XINTR(vec) Xintr##vec - .globl _C_LABEL(apic_stray) #ifdef MULTIPROCESSOR - .globl XINTR(ipi) -XINTR(ipi): +IDTVEC(intripi) subl $8,%esp /* space for tf_{err,trapno} */ - INTRENTRY + INTRENTRY(ipi) MAKE_FRAME pushl CPL movl _C_LABEL(lapic_ppr),%eax @@ -55,11 +52,13 @@ XINTR(ipi): call _C_LABEL(i386_ipi_handler) cli popl CPL +#ifdef DIAGNOSTIC + movl $0xf8,%esi +#endif INTRFASTEXIT - .globl XINTR(ipi_invltlb) .p2align 4,0xcc -XINTR(ipi_invltlb): +IDTVEC(intripi_invltlb) pushl %eax pushl %ds movl $GSEL(GDATA_SEL, SEL_KPL), %eax @@ -77,9 +76,8 @@ XINTR(ipi_invltlb): popl %eax iret - .globl XINTR(ipi_invlpg) .p2align 4,0xcc -XINTR(ipi_invlpg): +IDTVEC(intripi_invlpg) pushl %eax pushl %ds movl $GSEL(GDATA_SEL, SEL_KPL), %eax @@ -97,9 +95,8 @@ XINTR(ipi_invlpg): popl %eax iret - .globl XINTR(ipi_invlrange) .p2align 4,0xcc -XINTR(ipi_invlrange): +IDTVEC(intripi_invlrange) pushl %eax pushl %edx pushl %ds @@ -123,9 +120,8 @@ XINTR(ipi_invlrange): popl %eax iret - .globl XINTR(ipi_reloadcr3) .p2align 4,0xcc -XINTR(ipi_reloadcr3): +IDTVEC(intripi_reloadcr3) pushl %eax pushl %ds movl $GSEL(GDATA_SEL, SEL_KPL), %eax @@ -155,10 +151,9 @@ XINTR(ipi_reloadcr3): /* * Interrupt from the local APIC timer. */ - .globl XINTR(ltimer) -XINTR(ltimer): +IDTVEC(intrltimer) subl $8,%esp /* space for tf_{err,trapno} */ - INTRENTRY + INTRENTRY(ltimer) MAKE_FRAME pushl CPL movl _C_LABEL(lapic_ppr),%eax @@ -173,10 +168,9 @@ XINTR(ltimer): decl CPUVAR(IDEPTH) jmp _C_LABEL(Xdoreti) - .globl XINTR(softclock), XINTR(softnet), XINTR(softtty) -XINTR(softclock): +KIDTVEC(intrsoftclock) subl $8,%esp /* space for tf_{err,trapno} */ - INTRENTRY + INTRENTRY(intrsoftclock) MAKE_FRAME pushl CPL movl $IPL_SOFTCLOCK,CPL @@ -190,9 +184,9 @@ XINTR(softclock): decl CPUVAR(IDEPTH) jmp _C_LABEL(Xdoreti) -XINTR(softnet): +KIDTVEC(intrsoftnet) subl $8,%esp /* space for tf_{err,trapno} */ - INTRENTRY + INTRENTRY(intrsoftnet) MAKE_FRAME pushl CPL movl $IPL_SOFTNET,CPL @@ -207,9 +201,9 @@ XINTR(softnet): jmp _C_LABEL(Xdoreti) #undef DONETISR -XINTR(softtty): +KIDTVEC(intrsofttty) subl $8,%esp /* space for tf_{err,trapno} */ - INTRENTRY + INTRENTRY(intrsofttty) MAKE_FRAME pushl CPL movl $IPL_SOFTTTY,CPL @@ -236,9 +230,9 @@ XINTR(softtty): */ #define APICINTR(name, num, early_ack, late_ack, mask, unmask, level_mask) \ -_C_LABEL(Xintr_##name##num): \ +IDTVEC(intr_##name##num) \ subl $8,%esp /* space for tf_{err,trapno} */ ;\ - INTRENTRY ;\ + INTRENTRY(intr_##name##num) ;\ MAKE_FRAME ;\ pushl CPL ;\ movl _C_LABEL(lapic_ppr),%eax ;\ diff --git a/sys/arch/i386/i386/bios.c b/sys/arch/i386/i386/bios.c index b2b20a62781..7e303d15aaa 100644 --- a/sys/arch/i386/i386/bios.c +++ b/sys/arch/i386/i386/bios.c @@ -1,4 +1,4 @@ -/* $OpenBSD: bios.c,v 1.116 2017/07/15 17:20:56 tedu Exp $ */ +/* $OpenBSD: bios.c,v 1.117 2018/04/11 15:44:08 bluhm Exp $ */ /* * Copyright (c) 1997-2001 Michael Shalayeff @@ -637,7 +637,6 @@ bios32_service(u_int32_t service, bios32_entry_t e, bios32_entry_info_t ei) u_long pa, endpa; vaddr_t va, sva; u_int32_t base, count, off, ent; - int slot; if (bios32_entry.offset == 0) return 0; @@ -664,8 +663,7 @@ bios32_service(u_int32_t service, bios32_entry_t e, bios32_entry_info_t ei) /* Store bios32 service kva for cleanup later */ bios_softc->bios32_service_va = sva; - slot = gdt_get_slot(); - setgdt(slot, (caddr_t)va, BIOS32_END, SDT_MEMERA, SEL_KPL, 1, 0); + setgdt(GBIOS32_SEL, (caddr_t)va, BIOS32_END, SDT_MEMERA, SEL_KPL, 1, 0); for (pa = trunc_page(BIOS32_START), va += trunc_page(BIOS32_START); @@ -682,7 +680,7 @@ bios32_service(u_int32_t service, bios32_entry_t e, bios32_entry_info_t ei) } } - e->segment = GSEL(slot, SEL_KPL); + e->segment = GSEL(GBIOS32_SEL, SEL_KPL); e->offset = (vaddr_t)ent; ei->bei_base = base; diff --git a/sys/arch/i386/i386/cpu.c b/sys/arch/i386/i386/cpu.c index d5f240e58ee..54e4395f6f3 100644 --- a/sys/arch/i386/i386/cpu.c +++ b/sys/arch/i386/i386/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.88 2018/03/31 13:45:03 bluhm Exp $ */ +/* $OpenBSD: cpu.c,v 1.89 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: cpu.c,v 1.1.2.7 2000/06/26 02:04:05 sommerfeld Exp $ */ /*- @@ -80,6 +80,7 @@ #include <uvm/uvm_extern.h> #include <machine/codepatch.h> +#include <machine/cpu_full.h> #include <machine/cpu.h> #include <machine/cpufunc.h> #include <machine/cpuvar.h> @@ -113,6 +114,15 @@ #include <i386/isa/nvram.h> #include <dev/isa/isareg.h> +/* #define CPU_DEBUG */ + +#ifdef CPU_DEBUG +#define DPRINTF(x...) do { printf(x); } while (0) +#else +#define DPRINTF(x...) +#endif /* CPU_DEBUG */ + + struct cpu_softc; int cpu_match(struct device *, void *, void *); @@ -139,7 +149,8 @@ struct cpu_functions mp_cpu_funcs = * CPU, on uniprocessors). The CPU info list is initialized to * point at it. */ -struct cpu_info cpu_info_primary; +struct cpu_info_full cpu_info_full_primary = { .cif_cpu = { .ci_self = &cpu_info_primary } }; + struct cpu_info *cpu_info_list = &cpu_info_primary; #ifdef MULTIPROCESSOR @@ -233,8 +244,13 @@ cpu_attach(struct device *parent, struct device *self, void *aux) #endif if (caa->cpu_role == CPU_ROLE_AP) { - ci = malloc(sizeof(*ci), M_DEVBUF, M_WAITOK|M_ZERO); + struct cpu_info_full *cif; + + cif = km_alloc(sizeof *cif, &kv_any, &kp_zero, &kd_waitok); + ci = &cif->cif_cpu; #ifdef MULTIPROCESSOR + ci->ci_tss = &cif->cif_tss; + cpu_enter_pages(cif); if (cpu_info[cpunum] != NULL) panic("cpu at apic id %d already attached?", cpunum); cpu_info[cpunum] = ci; @@ -524,11 +540,11 @@ rdrand(void *v) int cpu_activate(struct device *self, int act) { - struct cpu_info *sc = (struct cpu_info *)self; + struct cpu_softc *sc = (struct cpu_softc *)self; switch (act) { case DVACT_RESUME: - if (sc->ci_cpuid == 0) + if (sc->sc_info->ci_cpuid == 0) rdrand(NULL); break; } @@ -536,6 +552,37 @@ cpu_activate(struct device *self, int act) return (0); } +void +cpu_enter_pages(struct cpu_info_full *cif) +{ + vaddr_t va; + paddr_t pa; + + /* The TSS + GDT need to be readable */ + va = (vaddr_t)&cif->cif_tss; + pmap_extract(pmap_kernel(), va, &pa); + pmap_enter_special(va, pa, PROT_READ, 0); + DPRINTF("%s: entered tss+gdt page at va 0x%08x pa 0x%08x\n", __func__, + (uint32_t)va, (uint32_t)pa); + + /* The trampoline stack page needs to be read/write */ + va = (vaddr_t)&cif->cif_tramp_stack; + pmap_extract(pmap_kernel(), va, &pa); + pmap_enter_special(va, pa, PROT_READ | PROT_WRITE, 0); + DPRINTF("%s: entered t.stack page at va 0x%08x pa 0x%08x\n", __func__, + (uint32_t)va, (uint32_t)pa); + + cif->cif_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); + cif->cif_tss.tss_esp0 = va + sizeof(cif->cif_tramp_stack) - 16; + DPRINTF("%s: cif_tss.tss_esp0 = 0x%08x\n", __func__, + (uint32_t)cif->cif_tss.tss_esp0); + cif->cif_cpu.ci_intr_esp = cif->cif_tss.tss_esp0 - + sizeof(struct trampframe); + + /* empty iomap */ + cif->cif_tss.tss_ioopt = sizeof(cif->cif_tss) << 16; +} + #ifdef MULTIPROCESSOR void cpu_boot_secondary_processors(void) diff --git a/sys/arch/i386/i386/gdt.c b/sys/arch/i386/i386/gdt.c index 70542e1686b..ba8eb01907f 100644 --- a/sys/arch/i386/i386/gdt.c +++ b/sys/arch/i386/i386/gdt.c @@ -1,4 +1,4 @@ -/* $OpenBSD: gdt.c,v 1.40 2018/03/31 13:45:03 bluhm Exp $ */ +/* $OpenBSD: gdt.c,v 1.41 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: gdt.c,v 1.28 2002/12/14 09:38:50 junyoung Exp $ */ /*- @@ -31,20 +31,13 @@ */ /* - * The GDT handling has two phases. During the early lifetime of the - * kernel there is a static gdt which will be stored in bootstrap_gdt. - * Later, when the virtual memory is initialized, this will be - * replaced with a maximum sized GDT. - * - * The bootstrap GDT area will hold the initial requirement of NGDT - * descriptors. The normal GDT will have a statically sized virtual memory - * area of size GDT_SIZE. + * The initial GDT is setup for the boot processor. The GDT holds + * NGDT descriptors. * * Every CPU in a system has its own copy of the GDT. The only real difference * between the two are currently that there is a cpu-specific segment holding * the struct cpu_info of the processor, for simplicity at getting cpu_info - * fields from assembly. The boot processor will actually refer to the global - * copy of the GDT as pointed to by the gdt variable. + * fields from assembly. */ #include <sys/param.h> @@ -53,20 +46,13 @@ #include <uvm/uvm_extern.h> +#include <machine/cpu.h> #include <machine/gdt.h> #include <machine/pcb.h> - -union descriptor bootstrap_gdt[NGDT]; -union descriptor *gdt = bootstrap_gdt; - -int gdt_next; /* next available slot for sweeping */ -int gdt_free; /* next free slot; terminated with GNULL_SEL */ +#include <machine/tss.h> struct mutex gdt_lock_store = MUTEX_INITIALIZER(IPL_HIGH); -int gdt_get_slot(void); -void gdt_put_slot(int); - /* * Lock and unlock the GDT. */ @@ -78,7 +64,7 @@ void setgdt(int sel, void *base, size_t limit, int type, int dpl, int def32, int gran) { - struct segment_descriptor *sd = &gdt[sel].sd; + struct segment_descriptor *sd = &cpu_info_primary.ci_gdt[sel].sd; CPU_INFO_ITERATOR cii; struct cpu_info *ci; @@ -86,7 +72,7 @@ setgdt(int sel, void *base, size_t limit, int type, int dpl, int def32, setsegment(sd, base, limit, type, dpl, def32, gran); CPU_INFO_FOREACH(cii, ci) - if (ci->ci_gdt != NULL && ci->ci_gdt != gdt) + if (ci->ci_gdt != NULL && ci != &cpu_info_primary) ci->ci_gdt[sel].sd = *sd; } @@ -96,24 +82,8 @@ setgdt(int sel, void *base, size_t limit, int type, int dpl, int def32, void gdt_init(void) { - struct vm_page *pg; - vaddr_t va; struct cpu_info *ci = &cpu_info_primary; - gdt_next = GBIOS32_SEL; - gdt_free = GNULL_SEL; - - gdt = (union descriptor *)uvm_km_valloc(kernel_map, GDT_SIZE); - for (va = (vaddr_t)gdt; va < (vaddr_t)gdt + GDT_SIZE; - va += PAGE_SIZE) { - pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); - if (pg == NULL) - panic("gdt_init: no pages"); - pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), - PROT_READ | PROT_WRITE); - } - bcopy(bootstrap_gdt, gdt, NGDT * sizeof(union descriptor)); - ci->ci_gdt = gdt; setsegment(&ci->ci_gdt[GCPU_SEL].sd, ci, sizeof(struct cpu_info)-1, SDT_MEMRWA, SEL_KPL, 0, 0); @@ -127,29 +97,10 @@ gdt_init(void) void gdt_alloc_cpu(struct cpu_info *ci) { - struct vm_page *pg; - vaddr_t va; - - ci->ci_gdt = (union descriptor *)uvm_km_valloc(kernel_map, - GDT_SIZE + sizeof(*ci->ci_tss)); - ci->ci_tss = (void *)ci->ci_gdt + GDT_SIZE; - uvm_map_pageable(kernel_map, (vaddr_t)ci->ci_gdt, - (vaddr_t)ci->ci_gdt + GDT_SIZE + sizeof(*ci->ci_tss), - FALSE, FALSE); - for (va = (vaddr_t)ci->ci_gdt; - va < (vaddr_t)ci->ci_gdt + GDT_SIZE + sizeof(*ci->ci_tss); - va += PAGE_SIZE) { - pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); - if (pg == NULL) - panic("gdt_alloc_cpu: no pages"); - pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), - PROT_READ | PROT_WRITE); - } - bzero(ci->ci_gdt, GDT_SIZE); - bcopy(gdt, ci->ci_gdt, GDT_SIZE); + ci->ci_gdt = (void *)(ci->ci_tss + 1); + bcopy(cpu_info_primary.ci_gdt, ci->ci_gdt, GDT_SIZE); setsegment(&ci->ci_gdt[GCPU_SEL].sd, ci, sizeof(struct cpu_info)-1, SDT_MEMRWA, SEL_KPL, 0, 0); - bzero(ci->ci_tss, sizeof(*ci->ci_tss)); } #endif /* MULTIPROCESSOR */ @@ -172,29 +123,3 @@ gdt_init_cpu(struct cpu_info *ci) ltr(GSEL(GTSS_SEL, SEL_KPL)); lldt(0); } - -/* - * Allocate a GDT slot as follows: - * 1) If there are entries on the free list, use those. - * 2) If there are fewer than NGDT entries in use, there are free slots - * near the end that we can sweep through. - */ -int -gdt_get_slot(void) -{ - int slot; - - gdt_lock(); - - if (gdt_free != GNULL_SEL) { - slot = gdt_free; - gdt_free = gdt[slot].gd.gd_selector; - } else { - if (gdt_next >= NGDT) - panic("gdt_get_slot: out of GDT descriptors"); - slot = gdt_next++; - } - - gdt_unlock(); - return (slot); -} diff --git a/sys/arch/i386/i386/genassym.cf b/sys/arch/i386/i386/genassym.cf index 7f3385945d5..5e0806de7d5 100644 --- a/sys/arch/i386/i386/genassym.cf +++ b/sys/arch/i386/i386/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.45 2018/03/31 13:45:03 bluhm Exp $ +# $OpenBSD: genassym.cf,v 1.46 2018/04/11 15:44:08 bluhm Exp $ # # Copyright (c) 1982, 1990 The Regents of the University of California. # All rights reserved. @@ -108,6 +108,17 @@ member tf_eflags member tf_eip member tf_err member tf_eax +member tf_ecx +member tf_edx +member tf_ebx +member tf_ebp +member tf_esi +member tf_edi +member tf_ds +member tf_es +member tf_fs +member tf_gs +member tf_ss member tf_esp define FRAMESIZE sizeof(struct trapframe) @@ -115,6 +126,41 @@ define FRAMESIZE sizeof(struct trapframe) struct intrframe member if_ppl +# iret stack frame +struct iretframe +member irf_trapno +member irf_err +member irf_eip +member irf_cs +member irf_eflags +member irf_esp +member irf_ss +member irf_vm86_es +member irf_vm86_ds +member irf_vm86_fs +member irf_vm86_gs +define SIZEOF_IRETFRAME sizeof(struct iretframe) + +# trampoline stack frame +struct trampframe +member trf__deadbeef +member trf__kern_esp +member trf_fs +member trf_eax +member trf_ebp +member trf_trapno +member trf_err +member trf_eip +member trf_cs +member trf_eflags +member trf_esp +member trf_ss +member trf_vm86_es +member trf_vm86_ds +member trf_vm86_fs +member trf_vm86_gs +define SIZEOF_TRAMPFRAME sizeof(struct trampframe) + # signal handling struct sigframe SIGF_ member HANDLER sf_handler @@ -139,6 +185,7 @@ endif define IP_SRC offsetof(struct ip, ip_src) define IP_DST offsetof(struct ip, ip_dst) +define CPU_INFO_SCRATCH offsetof(struct cpu_info, ci_scratch) define CPU_INFO_SELF offsetof(struct cpu_info, ci_self) define CPU_INFO_APICID offsetof(struct cpu_info, ci_apicid) define CPU_INFO_CURPROC offsetof(struct cpu_info, ci_curproc) @@ -160,12 +207,17 @@ ifdef DIAGNOSTIC define CPU_INFO_MUTEX_LEVEL offsetof(struct cpu_info, ci_mutex_level) endif define CPU_INFO_CURPMAP offsetof(struct cpu_info, ci_curpmap) +define CPU_INFO_KERN_ESP offsetof(struct cpu_info, ci_kern_esp) +define CPU_INFO_INTR_ESP offsetof(struct cpu_info, ci_intr_esp) +define CPU_INFO_KERN_CR3 offsetof(struct cpu_info, ci_kern_cr3) +define CPU_INFO_USER_CR3 offsetof(struct cpu_info, ci_user_cr3) + +define SIZEOF_CPU_INFO sizeof(struct cpu_info) struct pmap member pm_pdirpa +member pm_pdirpa_intel struct i386tss member tss_esp0 -define SIZEOF_CPU_INFO sizeof(struct cpu_info) - diff --git a/sys/arch/i386/i386/kvm86.c b/sys/arch/i386/i386/kvm86.c index e540a0215f4..160252c7418 100644 --- a/sys/arch/i386/i386/kvm86.c +++ b/sys/arch/i386/i386/kvm86.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kvm86.c,v 1.16 2018/03/31 13:49:03 bluhm Exp $ */ +/* $OpenBSD: kvm86.c,v 1.17 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: kvm86.c,v 1.10 2005/12/26 19:23:59 perry Exp $ */ /* * Copyright (c) 2002 @@ -95,11 +95,7 @@ kvm86_init(void) vmd = (struct kvm86_data *)(buf + PAGE_SIZE); tss = &vmd->tss; - /* - * derive TSS from proc0 - * we want to access all IO ports, so we need a full-size - * permission bitmap - */ + /* derive TSS from primary cpu */ memcpy(tss, cpu_info_primary.ci_tss, sizeof(struct i386tss)); tss->tss_esp0 = (int)vmd; tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); @@ -107,7 +103,7 @@ kvm86_init(void) vmd->iomap[i] = 0; tss->tss_ioopt = ((caddr_t)vmd->iomap - (caddr_t)&tss) << 16; - /* setup TSS descriptor (including our iomap) */ + /* setup TSS descriptor */ setsegment(&vmd->sd, tss, sizeof(struct i386tss) + sizeof(vmd->iomap) - 1, SDT_SYS386TSS, SEL_KPL, 0, 0); diff --git a/sys/arch/i386/i386/kvm86call.S b/sys/arch/i386/i386/kvm86call.S index f1e0d5324bb..74812ff7c48 100644 --- a/sys/arch/i386/i386/kvm86call.S +++ b/sys/arch/i386/i386/kvm86call.S @@ -1,4 +1,4 @@ -/* $OpenBSD: kvm86call.S,v 1.12 2018/03/31 13:49:03 bluhm Exp $ */ +/* $OpenBSD: kvm86call.S,v 1.13 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: kvm86call.S,v 1.7 2006/04/11 17:14:07 drochner Exp $ */ /*- @@ -91,7 +91,7 @@ ENTRY(kvm86_call) jne 1b movl %ecx,%edx #else - leal _C_LABEL(cpu_info_primary),%ecx + movl CPUVAR(SELF),%ecx #endif movl CPU_INFO_CURPCB(%ecx),%eax @@ -100,7 +100,7 @@ ENTRY(kvm86_call) #ifdef MULTIPROCESSOR movl CPU_INFO_GDT(%edx),%eax #else - movl _C_LABEL(gdt),%eax + movl CPU_INFO_GDT(%ecx),%eax #endif movl $GSEL(GTSS_SEL, SEL_KPL),%edi andl $~0x0200,4(%eax,%edi,1) /* reset "task busy" */ @@ -150,7 +150,6 @@ ENTRY(kvm86_call) addl $8,%esp iret - /* void kvm86_ret(struct trapframe *, int) */ ENTRY(kvm86_ret) pushl %ebp @@ -185,8 +184,8 @@ ENTRY(kvm86_ret) jne 1b movl CPU_INFO_GDT(%ecx),%eax #else - leal _C_LABEL(cpu_info_primary),%ecx - movl _C_LABEL(gdt),%eax + movl CPUVAR(SELF),%ecx + movl CPU_INFO_GDT(%ecx),%eax #endif movl $GSEL(GTSS_SEL, SEL_KPL),%edi movl SCRTSS0, %edx @@ -208,7 +207,7 @@ ENTRY(kvm86_ret) cmpl %eax,%edx jne 1b #else - leal _C_LABEL(cpu_info_primary),%ecx + movl CPUVAR(SELF),%ecx #endif popl %eax /* restore curpcb */ movl %eax,CPU_INFO_CURPCB(%ecx) diff --git a/sys/arch/i386/i386/lapic.c b/sys/arch/i386/i386/lapic.c index f28e3cf797b..631707b42fd 100644 --- a/sys/arch/i386/i386/lapic.c +++ b/sys/arch/i386/i386/lapic.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lapic.c,v 1.44 2018/03/31 13:45:03 bluhm Exp $ */ +/* $OpenBSD: lapic.c,v 1.45 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: lapic.c,v 1.1.2.8 2000/02/23 06:10:50 sommerfeld Exp $ */ /*- @@ -55,6 +55,14 @@ #include <dev/ic/i8253reg.h> +/* #define LAPIC_DEBUG */ + +#ifdef LAPIC_DEBUG +#define DPRINTF(x...) do { printf(x); } while(0) +#else +#define DPRINTF(x...) +#endif /* LAPIC_DEBUG */ + struct evcount clk_count; #ifdef MULTIPROCESSOR struct evcount ipi_count; @@ -87,6 +95,10 @@ lapic_map(paddr_t lapic_base) pmap_pte_set(va, lapic_base, PG_RW | PG_V | PG_N); invlpg(va); + pmap_enter_special(va, lapic_base, PROT_READ | PROT_WRITE, PG_N); + DPRINTF("%s: entered lapic page va 0x%08lx pa 0x%08lx\n", __func__, + va, lapic_base); + #ifdef MULTIPROCESSOR cpu_init_first(); #endif diff --git a/sys/arch/i386/i386/locore.s b/sys/arch/i386/i386/locore.s index d0171bed9f6..c01bd6253f2 100644 --- a/sys/arch/i386/i386/locore.s +++ b/sys/arch/i386/i386/locore.s @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.s,v 1.184 2018/03/31 13:45:03 bluhm Exp $ */ +/* $OpenBSD: locore.s,v 1.185 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $ */ /*- @@ -100,28 +100,114 @@ #define CLEAR_ASTPENDING(cpreg) \ movl $0,P_MD_ASTPENDING(cpreg) +#ifdef VM86 +#define SAVE_VM86 \ + testl $PSL_VM,TRF_EFLAGS(%ebp) ; \ + jz 98f ; \ + movl TRF_VM86_ES(%ebp),%eax ; \ + movl %eax,IRF_VM86_ES(%esp) ; \ + movl TRF_VM86_DS(%ebp),%eax ; \ + movl %eax,IRF_VM86_DS(%esp) ; \ + movl TRF_VM86_FS(%ebp),%eax ; \ + movl %eax,IRF_VM86_FS(%esp) ; \ + movl TRF_VM86_GS(%ebp),%eax ; \ + movl %eax,IRF_VM86_GS(%esp) ; \ +98: ; + +#define RESTORE_VM86 \ + testl $PSL_VM,TRF_EFLAGS(%ebp) ; \ + jz 99f ; \ + movl TRF_VM86_ES(%esp),%eax ; \ + movl %eax,TRF_VM86_ES(%ebp) ; \ + movl TRF_VM86_DS(%esp),%eax ; \ + movl %eax,TRF_VM86_DS(%ebp) ; \ + movl TRF_VM86_FS(%esp),%eax ; \ + movl %eax,TRF_VM86_FS(%ebp) ; \ + movl TRF_VM86_GS(%esp),%eax ; \ + movl %eax,TRF_VM86_GS(%ebp) ; \ +99: ; + +#else + +#define SAVE_VM86 ; +#define RESTORE_VM86 ; + +#endif /* VM86 */ + /* * These are used on interrupt or trap entry or exit. */ -#define INTRENTRY \ - cld ; \ - SMAP_CLAC ; \ - pushl %eax ; \ - pushl %ecx ; \ - pushl %edx ; \ - pushl %ebx ; \ - pushl %ebp ; \ - pushl %esi ; \ - pushl %edi ; \ - pushl %ds ; \ - pushl %es ; \ - pushl %gs ; \ +#define INTRENTRY_LABEL(label) X##label##_untramp +#define INTRENTRY(label) \ + /* we have an iretframe */ ; \ + testb $SEL_RPL,IRF_CS(%esp) ; \ + /* from kernel, stay on kernel stack, use iretframe */ ; \ + je INTRENTRY_LABEL(label) ; \ + /* entering from user space, map kernel */ ; \ + pushl %ebp ; \ + pushl %eax ; \ + pushl %fs ; \ + movl $GSEL(GCPU_SEL, SEL_KPL),%eax ; \ + movw %ax,%fs ; \ + movl CPUVAR(KERN_CR3),%eax ; \ + testl %eax,%eax ; \ + jz 97f ; \ + movl %eax,%cr3 ; \ + jmp 97f ; \ + .text ; \ + .global INTRENTRY_LABEL(label) ; \ +INTRENTRY_LABEL(label): /* from kernel */ ; \ + jmp 98f ; \ + /* from user space, build trampframe */ ; \ +97: movl CPUVAR(KERN_ESP),%eax ; \ + pushl %eax ; \ + pushl $0xdeadbeef ; \ + movl %esp,%ebp ; \ + movl %eax,%esp ; \ + subl $SIZEOF_IRETFRAME,%esp ; \ + /* we have a trampframe, copy to iretframe on kernel stack */ ; \ + movl TRF_SS(%ebp),%eax ; \ + movl %eax,IRF_SS(%esp) ; \ + movl TRF_ESP(%ebp),%eax ; \ + movl %eax,IRF_ESP(%esp) ; \ + movl TRF_EFLAGS(%ebp),%eax ; \ + movl %eax,IRF_EFLAGS(%esp) ; \ + movl TRF_CS(%ebp),%eax ; \ + movl %eax,IRF_CS(%esp) ; \ + movl TRF_EIP(%ebp),%eax ; \ + movl %eax,IRF_EIP(%esp) ; \ + movl TRF_ERR(%ebp),%eax ; \ + movl %eax,IRF_ERR(%esp) ; \ + movl TRF_TRAPNO(%ebp),%eax ; \ + movl %eax,IRF_TRAPNO(%esp) ; \ + SAVE_VM86 ; \ + movl TRF_FS(%ebp),%eax ; \ + movw %ax,%fs ; \ + movl TRF_EAX(%ebp),%eax ; \ + movl TRF_EBP(%ebp),%ebp ; \ +98: INTR_SAVE_ALL + +#define INTR_SAVE_ALL \ + cld ; \ + SMAP_CLAC ; \ + /* we have an iretframe, build trapframe */ ; \ + subl $44,%esp ; \ + movl %eax,TF_EAX(%esp) ; \ + movl %ecx,TF_ECX(%esp) ; \ + movl %edx,TF_EDX(%esp) ; \ + movl %ebx,TF_EBX(%esp) ; \ + movl %ebp,TF_EBP(%esp) ; \ + movl %esi,TF_ESI(%esp) ; \ + movl %edi,TF_EDI(%esp) ; \ + movw %ds,TF_DS(%esp) ; \ + movw %es,TF_ES(%esp) ; \ + movw %gs,TF_GS(%esp) ; \ movl $GSEL(GDATA_SEL, SEL_KPL),%eax ; \ - movw %ax,%ds ; \ - movw %ax,%es ; \ + movw %ax,%ds ; \ + movw %ax,%es ; \ xorl %eax,%eax ; /* $GSEL(GNULL_SEL, SEL_KPL) == 0 */ \ - movw %ax,%gs ; \ - pushl %fs ; \ + movw %ax,%gs ; \ + movw %fs,TF_FS(%esp) ; \ movl $GSEL(GCPU_SEL, SEL_KPL),%eax ; \ movw %ax,%fs @@ -139,9 +225,7 @@ popl %eax #define INTRFASTEXIT \ - INTR_RESTORE_ALL ;\ - addl $8,%esp ; \ - iret + jmp intr_fast_exit #define INTR_FAKE_TRAP 0xbadabada @@ -245,8 +329,10 @@ NENTRY(proc_trampoline) pushl %ebx call *%esi addl $4,%esp - INTRFASTEXIT - /* NOTREACHED */ +#ifdef DIAGNOSTIC + movl $0xfe,%esi +#endif + jmp .Lsyscall_check_asts /* This must come before any use of the CODEPATCH macros */ .section .codepatch,"a" @@ -775,14 +861,14 @@ switch_exited: movl PCB_ESP(%ebx),%esp movl PCB_EBP(%ebx),%ebp - /* Set this process' esp0 in the TSS. */ - movl CPUVAR(TSS),%edx - movl PCB_KSTACK(%ebx),%eax - movl %eax,TSS_ESP0(%edx) - /* Record new pcb. */ movl %ebx, CPUVAR(CURPCB) + /* record the bits needed for future U-->K transition */ + movl PCB_KSTACK(%ebx),%eax + subl $FRAMESIZE,%eax + movl %eax,CPUVAR(KERN_ESP) + /* * Activate the address space. The pcb copy of %cr3 will * be refreshed from the pmap, and because we're @@ -872,13 +958,10 @@ ENTRY(savectx) * and only enable them again on the final `iret' or before calling the AST * handler. */ -#define IDTVEC(name) ALIGN_TEXT; .globl X##name; X##name: #define TRAP(a) pushl $(a) ; jmp _C_LABEL(alltraps) #define ZTRAP(a) pushl $0 ; TRAP(a) - - .text IDTVEC(div) ZTRAP(T_DIVIDE) IDTVEC(dbg) @@ -904,16 +987,15 @@ IDTVEC(dna) #if NNPX > 0 pushl $0 # dummy error code pushl $T_DNA - INTRENTRY -#ifdef MULTIPROCESSOR + INTRENTRY(dna) pushl CPUVAR(SELF) -#else - pushl $_C_LABEL(cpu_info_primary) -#endif call *_C_LABEL(npxdna_func) addl $4,%esp testl %eax,%eax jz calltrap +#ifdef DIAGNOSTIC + movl $0xfd,%esi +#endif INTRFASTEXIT #else ZTRAP(T_DNA) @@ -932,7 +1014,7 @@ IDTVEC(prot) TRAP(T_PROTFLT) IDTVEC(f00f_redirect) pushl $T_PAGEFLT - INTRENTRY + INTRENTRY(f00f_redirect) testb $PGEX_U,TF_ERR(%esp) jnz calltrap movl %cr2,%eax @@ -967,12 +1049,15 @@ IDTVEC(fpu) * this is difficult for nested interrupts. */ subl $8,%esp /* space for tf_{err,trapno} */ - INTRENTRY + INTRENTRY(fpu) pushl CPL # if_ppl in intrframe pushl %esp # push address of intrframe incl _C_LABEL(uvmexp)+V_TRAP call _C_LABEL(npxintr) addl $8,%esp # pop address and if_ppl +#ifdef DIAGNOSTIC + movl $0xfc,%esi +#endif INTRFASTEXIT #else ZTRAP(T_ARITHTRAP) @@ -987,7 +1072,7 @@ IDTVEC(align) * necessary, and resume as if we were handling a general protection fault. * This will cause the process to get a SIGBUS. */ -NENTRY(resume_iret) +KUENTRY(resume_iret) ZTRAP(T_PROTFLT) NENTRY(resume_pop_ds) pushl %es @@ -1010,8 +1095,8 @@ NENTRY(resume_pop_fs) * All traps go through here. Call the generic trap handler, and * check for ASTs afterwards. */ -NENTRY(alltraps) - INTRENTRY +KUENTRY(alltraps) + INTRENTRY(alltraps) sti calltrap: #ifdef DIAGNOSTIC @@ -1032,13 +1117,15 @@ calltrap: * to emulate the patched instruction. */ movl $INTR_FAKE_TRAP, TF_ERR(%esp) - jz 2f + jz .Lalltraps_check_asts .Lreal_trap: #endif /* !defined(GPROF) && defined(DDBPROF) */ pushl %esp call _C_LABEL(trap) addl $4,%esp -2: /* Check for ASTs on exit to user mode. */ + +.Lalltraps_check_asts: + /* Check for ASTs on exit to user mode. */ cli CHECK_ASTPENDING(%ecx) je 1f @@ -1053,7 +1140,7 @@ calltrap: pushl %esp call _C_LABEL(ast) addl $4,%esp - jmp 2b + jmp .Lalltraps_check_asts 1: #if !defined(GPROF) && defined(DDBPROF) /* @@ -1071,6 +1158,9 @@ calltrap: #else cmpl CPL,%ebx jne 3f +#ifdef DIAGNOSTIC + movl $0xfb,%esi +#endif INTRFASTEXIT 3: sti pushl $spl_lowered @@ -1080,7 +1170,7 @@ calltrap: int $3 #endif /* DDB */ movl %ebx,CPL - jmp 2b + jmp .Lalltraps_check_asts .section .rodata spl_lowered: @@ -1115,16 +1205,49 @@ spl_lowered: popl %eax iret #endif /* !defined(GPROF) && defined(DDBPROF) */ + + .text +#ifdef DIAGNOSTIC +.Lintr_exit_not_blocked: + movl warn_once,%eax + testl %eax,%eax + jnz 1f + incl %eax + movl %eax,warn_once + pushl %esi /* marker indicating where we came from */ + pushl %edx /* EFLAGS are in %edx */ + pushl $.Lnot_blocked + call _C_LABEL(printf) + addl $12,%esp +#ifdef DDB + int $3 +#endif /* DDB */ +1: cli + jmp intr_fast_exit + + .data + .global warn_once +warn_once: + .long 0 + .section .rodata +.Lnot_blocked: + .asciz "WARNING: INTERRUPTS NOT BLOCKED ON INTERRUPT RETURN 0x%x 0x%x\n" + .text +#endif + /* * Trap gate entry for syscall */ IDTVEC(syscall) subl $8,%esp /* space for tf_{err,trapno} */ - INTRENTRY + INTRENTRY(syscall) + sti pushl %esp call _C_LABEL(syscall) addl $4,%esp -2: /* Check for ASTs on exit to user mode. */ + +.Lsyscall_check_asts: + /* Check for ASTs on exit to user mode. */ cli CHECK_ASTPENDING(%ecx) je 1f @@ -1134,8 +1257,79 @@ IDTVEC(syscall) pushl %esp call _C_LABEL(ast) addl $4,%esp - jmp 2b -1: INTRFASTEXIT + jmp .Lsyscall_check_asts +1: +#ifdef DIAGNOSTIC + movl $0xff,%esi +#endif + jmp intr_fast_exit + +NENTRY(intr_fast_exit) +#ifdef DIAGNOSTIC + pushfl + popl %edx + testl $PSL_I,%edx + jnz .Lintr_exit_not_blocked +#endif + /* we have a full trapframe */ + INTR_RESTORE_ALL + /* now we have an iretframe */ + testb $SEL_RPL,IRF_CS(%esp) + /* recursing into kernel: stay on kernel stack using iretframe */ + je doreti_iret + + /* leaving kernel: build trampframe on cpu stack */ + pushl %ebp + pushl %eax + pushl %fs + movl $GSEL(GCPU_SEL, SEL_KPL),%eax + movw %ax,%fs + movl CPUVAR(INTR_ESP),%eax + pushl %eax + pushl $0xcafecafe + /* now we have an trampframe, copy frame to cpu stack */ + movl %eax,%ebp + movl TRF_EIP(%esp),%eax + movl %eax,TRF_EIP(%ebp) + movl TRF_CS(%esp),%eax + movl %eax,TRF_CS(%ebp) + movl TRF_EFLAGS(%esp),%eax + movl %eax,TRF_EFLAGS(%ebp) + movl TRF_ESP(%esp),%eax + movl %eax,TRF_ESP(%ebp) + movl TRF_SS(%esp),%eax + movl %eax,TRF_SS(%ebp) + movl TRF__DEADBEEF(%esp),%eax + movl %eax,TRF__DEADBEEF(%ebp) + movl TRF__KERN_ESP(%esp),%eax + movl %eax,TRF__KERN_ESP(%ebp) + RESTORE_VM86 + movl TRF_FS(%esp),%eax + movl %eax,TRF_FS(%ebp) + movl TRF_EAX(%esp),%eax + movl %eax,TRF_EAX(%ebp) + movl TRF_EBP(%esp),%eax + movl %eax,TRF_EBP(%ebp) + /* switch to cpu stack, where we copied the trampframe */ + movl %ebp,%esp + movl CPUVAR(USER_CR3),%eax + testl %eax,%eax + jz 1f + jmp iret_tramp + +KUENTRY(iret_tramp) + movl %eax,%cr3 + /* we have a trampframe; restore registers and adjust to iretframe */ +1: popl %eax + popl %eax + popl %fs + popl %eax + popl %ebp + .globl _C_LABEL(doreti_iret) +_C_LABEL(doreti_iret): + /* we have an iretframe */ + addl $IRF_EIP,%esp + iret #include <i386/i386/vector.s> #include <i386/isa/icu.s> diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c index a508c541690..14f8a48bd8e 100644 --- a/sys/arch/i386/i386/machdep.c +++ b/sys/arch/i386/i386/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.614 2018/03/31 13:55:05 bluhm Exp $ */ +/* $OpenBSD: machdep.c,v 1.615 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */ /*- @@ -100,6 +100,7 @@ #include <machine/bus.h> #include <machine/cpu.h> +#include <machine/cpu_full.h> #include <machine/cpufunc.h> #include <machine/cpuvar.h> #include <machine/gdt.h> @@ -169,6 +170,14 @@ extern struct proc *npxproc; #include <dev/ic/pckbcvar.h> #endif +/* #define MACHDEP_DEBUG */ + +#ifdef MACHDEP_DEBUG +#define DPRINTF(x...) do { printf(x); } while (0) +#else +#define DPRINTF(x...) +#endif /* MACHDEP_DEBUG */ + #include "vmm.h" void replacesmap(void); @@ -314,6 +323,7 @@ void p3_get_bus_clock(struct cpu_info *); void p4_update_cpuspeed(void); void p3_update_cpuspeed(void); int pentium_cpuspeed(int *); +void enter_shared_special_pages(void); #if NVMM > 0 void cpu_check_vmm_cap(struct cpu_info *); #endif /* NVMM > 0 */ @@ -415,6 +425,47 @@ cpu_startup(void) #endif } ioport_malloc_safe = 1; + + /* enter the IDT and trampoline code in the u-k maps */ + enter_shared_special_pages(); + + /* initialize CPU0's TSS and GDT and put them in the u-k maps */ + cpu_enter_pages(&cpu_info_full_primary); +} + +void +enter_shared_special_pages(void) +{ + extern char __kutext_start[], __kutext_end[], __kernel_kutext_phys[]; + extern char __kudata_start[], __kudata_end[], __kernel_kudata_phys[]; + vaddr_t va; + paddr_t pa; + + /* idt */ + pmap_extract(pmap_kernel(), (vaddr_t)idt, &pa); + pmap_enter_special((vaddr_t)idt, pa, PROT_READ, 0); + + /* .kutext section */ + va = (vaddr_t)__kutext_start; + pa = (paddr_t)__kernel_kutext_phys; + while (va < (vaddr_t)__kutext_end) { + pmap_enter_special(va, pa, PROT_READ | PROT_EXEC, 0); + DPRINTF("%s: entered kutext page va 0x%08lx pa 0x%08lx\n", + __func__, (unsigned long)va, (unsigned long)pa); + va += PAGE_SIZE; + pa += PAGE_SIZE; + } + + /* .kudata section */ + va = (vaddr_t)__kudata_start; + pa = (paddr_t)__kernel_kudata_phys; + while (va < (vaddr_t)__kudata_end) { + pmap_enter_special(va, pa, PROT_READ | PROT_WRITE, 0); + DPRINTF("%s: entered kudata page va 0x%08lx pa 0x%08lx\n", + __func__, (unsigned long)va, (unsigned long)pa); + va += PAGE_SIZE; + pa += PAGE_SIZE; + } } /* @@ -429,11 +480,6 @@ i386_proc0_tss_init(void) pcb->pcb_cr0 = rcr0(); pcb->pcb_kstack = (int)proc0.p_addr + USPACE - 16; proc0.p_md.md_regs = (struct trapframe *)pcb->pcb_kstack - 1; - - /* empty iomap */ - cpu_info_primary.ci_tss->tss_ioopt = sizeof(struct i386tss) << 16; - cpu_info_primary.ci_tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); - cpu_info_primary.ci_tss->tss_esp0 = pcb->pcb_kstack; } #ifdef MULTIPROCESSOR @@ -442,10 +488,6 @@ i386_init_pcb_tss(struct cpu_info *ci) { struct pcb *pcb = ci->ci_idle_pcb; - ci->ci_tss->tss_ioopt = sizeof(*ci->ci_tss) << 16; - ci->ci_tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); - ci->ci_tss->tss_esp0 = pcb->pcb_kstack; - pcb->pcb_cr0 = rcr0(); } #endif /* MULTIPROCESSOR */ @@ -2924,7 +2966,12 @@ setregs(struct proc *p, struct exec_package *pack, u_long stack, * Initialize segments and descriptor tables */ -struct gate_descriptor idt_region[NIDT]; +/* IDT is now a full page, so we can map it in u-k */ +union { + struct gate_descriptor idt[NIDT]; + char align[PAGE_SIZE]; +} _idt_region __aligned(PAGE_SIZE); +#define idt_region _idt_region.idt struct gate_descriptor *idt = idt_region; extern struct user *proc0paddr; @@ -2960,7 +3007,6 @@ unsetgate(struct gate_descriptor *gd) void setregion(struct region_descriptor *rd, void *base, size_t limit) { - rd->rd_limit = (int)limit; rd->rd_base = (int)base; } @@ -2998,6 +3044,7 @@ fix_f00f(void) { struct region_descriptor region; vaddr_t va; + paddr_t pa; void *p; /* Allocate two new pages */ @@ -3009,14 +3056,21 @@ fix_f00f(void) idt = p; /* Fix up paging redirect */ - setgate(&idt[ 14], &IDTVEC(f00f_redirect), 0, SDT_SYS386TGT, SEL_KPL, + setgate(&idt[ 14], &IDTVEC(f00f_redirect), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); /* Map first page RO */ pmap_pte_setbits(va, 0, PG_RW); + /* add k-u read-only mappings XXX old IDT stays in place */ + /* XXX hshoexer: are f00f affected CPUs affected by meltdown? */ + pmap_extract(pmap_kernel(), va, &pa); + pmap_enter_special(va, pa, PROT_READ, 0); + pmap_extract(pmap_kernel(), va + PAGE_SIZE, &pa); + pmap_enter_special(va + PAGE_SIZE, pa, PROT_READ, 0); + /* Reload idtr */ - setregion(®ion, idt, sizeof(idt_region) - 1); + setregion(®ion, idt, NIDT * sizeof(idt[0]) - 1); lidt(®ion); /* Tell the rest of the world */ @@ -3033,8 +3087,6 @@ cpu_init_idt(void) } #endif /* MULTIPROCESSOR */ -struct i386tss proc0_tss; - void init386(paddr_t first_avail) { @@ -3045,55 +3097,60 @@ init386(paddr_t first_avail) proc0.p_addr = proc0paddr; cpu_info_primary.ci_self = &cpu_info_primary; cpu_info_primary.ci_curpcb = &proc0.p_addr->u_pcb; - cpu_info_primary.ci_tss = &proc0_tss; + cpu_info_primary.ci_tss = &cpu_info_full_primary.cif_tss; + cpu_info_primary.ci_gdt = (void *)(cpu_info_primary.ci_tss + 1); /* make bootstrap gdt gates and memory segments */ - setsegment(&gdt[GCODE_SEL].sd, 0, 0xfffff, SDT_MEMERA, SEL_KPL, 1, 1); - setsegment(&gdt[GICODE_SEL].sd, 0, 0xfffff, SDT_MEMERA, SEL_KPL, 1, 1); - setsegment(&gdt[GDATA_SEL].sd, 0, 0xfffff, SDT_MEMRWA, SEL_KPL, 1, 1); - setsegment(&gdt[GUCODE_SEL].sd, 0, atop(I386_MAX_EXE_ADDR) - 1, - SDT_MEMERA, SEL_UPL, 1, 1); - setsegment(&gdt[GUDATA_SEL].sd, 0, atop(VM_MAXUSER_ADDRESS) - 1, - SDT_MEMRWA, SEL_UPL, 1, 1); - setsegment(&gdt[GCPU_SEL].sd, &cpu_info_primary, + setsegment(&cpu_info_primary.ci_gdt[GCODE_SEL].sd, 0, 0xfffff, + SDT_MEMERA, SEL_KPL, 1, 1); + setsegment(&cpu_info_primary.ci_gdt[GICODE_SEL].sd, 0, 0xfffff, + SDT_MEMERA, SEL_KPL, 1, 1); + setsegment(&cpu_info_primary.ci_gdt[GDATA_SEL].sd, 0, 0xfffff, + SDT_MEMRWA, SEL_KPL, 1, 1); + setsegment(&cpu_info_primary.ci_gdt[GUCODE_SEL].sd, 0, + atop(I386_MAX_EXE_ADDR) - 1, SDT_MEMERA, SEL_UPL, 1, 1); + setsegment(&cpu_info_primary.ci_gdt[GUDATA_SEL].sd, 0, + atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1); + setsegment(&cpu_info_primary.ci_gdt[GCPU_SEL].sd, &cpu_info_primary, sizeof(struct cpu_info)-1, SDT_MEMRWA, SEL_KPL, 0, 0); - setsegment(&gdt[GUFS_SEL].sd, 0, atop(VM_MAXUSER_ADDRESS) - 1, - SDT_MEMRWA, SEL_UPL, 1, 1); - setsegment(&gdt[GUGS_SEL].sd, 0, atop(VM_MAXUSER_ADDRESS) - 1, - SDT_MEMRWA, SEL_UPL, 1, 1); - setsegment(&gdt[GTSS_SEL].sd, &proc0_tss, sizeof(proc0_tss)-1, + setsegment(&cpu_info_primary.ci_gdt[GUFS_SEL].sd, 0, + atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1); + setsegment(&cpu_info_primary.ci_gdt[GUGS_SEL].sd, 0, + atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1); + setsegment(&cpu_info_primary.ci_gdt[GTSS_SEL].sd, + cpu_info_primary.ci_tss, sizeof(cpu_info_primary.ci_tss)-1, SDT_SYS386TSS, SEL_KPL, 0, 0); /* exceptions */ - setgate(&idt[ 0], &IDTVEC(div), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 1], &IDTVEC(dbg), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 2], &IDTVEC(nmi), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 3], &IDTVEC(bpt), 0, SDT_SYS386TGT, SEL_UPL, GCODE_SEL); - setgate(&idt[ 4], &IDTVEC(ofl), 0, SDT_SYS386TGT, SEL_UPL, GCODE_SEL); - setgate(&idt[ 5], &IDTVEC(bnd), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 6], &IDTVEC(ill), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 7], &IDTVEC(dna), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 8], &IDTVEC(dble), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 9], &IDTVEC(fpusegm), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 10], &IDTVEC(tss), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 11], &IDTVEC(missing), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 12], &IDTVEC(stk), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 13], &IDTVEC(prot), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 14], &IDTVEC(page), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 15], &IDTVEC(rsvd), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 16], &IDTVEC(fpu), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 17], &IDTVEC(align), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 18], &IDTVEC(mchk), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); - setgate(&idt[ 19], &IDTVEC(simd), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 0], &IDTVEC(div), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 1], &IDTVEC(dbg), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 2], &IDTVEC(nmi), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 3], &IDTVEC(bpt), 0, SDT_SYS386IGT, SEL_UPL, GCODE_SEL); + setgate(&idt[ 4], &IDTVEC(ofl), 0, SDT_SYS386IGT, SEL_UPL, GCODE_SEL); + setgate(&idt[ 5], &IDTVEC(bnd), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 6], &IDTVEC(ill), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 7], &IDTVEC(dna), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 8], &IDTVEC(dble), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 9], &IDTVEC(fpusegm), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 10], &IDTVEC(tss), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 11], &IDTVEC(missing), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 12], &IDTVEC(stk), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 13], &IDTVEC(prot), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 14], &IDTVEC(page), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 15], &IDTVEC(rsvd), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 16], &IDTVEC(fpu), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 17], &IDTVEC(align), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 18], &IDTVEC(mchk), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); + setgate(&idt[ 19], &IDTVEC(simd), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); for (i = 20; i < NRSVIDT; i++) - setgate(&idt[i], &IDTVEC(rsvd), 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); + setgate(&idt[i], &IDTVEC(rsvd), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL); for (i = NRSVIDT; i < NIDT; i++) unsetgate(&idt[i]); - setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386TGT, SEL_UPL, GCODE_SEL); + setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386IGT, SEL_UPL, GCODE_SEL); - setregion(®ion, gdt, GDT_SIZE - 1); + setregion(®ion, cpu_info_primary.ci_gdt, GDT_SIZE - 1); lgdt(®ion); - setregion(®ion, idt, sizeof(idt_region) - 1); + setregion(®ion, idt, NIDT * sizeof(idt[0]) - 1); lidt(®ion); /* @@ -3384,7 +3441,7 @@ cpu_reset(void) * IDT to point to nothing. */ bzero((caddr_t)idt, sizeof(idt_region)); - setregion(®ion, idt, sizeof(idt_region) - 1); + setregion(®ion, idt, NIDT * sizeof(idt[0]) - 1); lidt(®ion); __asm volatile("divl %0,%1" : : "q" (0), "a" (0)); diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c index e48d5048e15..ba1dcf0c1e9 100644 --- a/sys/arch/i386/i386/pmap.c +++ b/sys/arch/i386/i386/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.199 2018/03/31 13:45:03 bluhm Exp $ */ +/* $OpenBSD: pmap.c,v 1.200 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */ /* @@ -930,6 +930,8 @@ pmap_bootstrap(vaddr_t kva_start) bzero(&kpm->pm_list, sizeof(kpm->pm_list)); /* pm_list not used */ kpm->pm_pdir = (vaddr_t)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE); kpm->pm_pdirpa = proc0.p_addr->u_pcb.pcb_cr3; + kpm->pm_pdir_intel = 0; + kpm->pm_pdirpa_intel = 0; kpm->pm_stats.wired_count = kpm->pm_stats.resident_count = atop(kva_start - VM_MIN_KERNEL_ADDRESS); kpm->pm_type = PMAP_TYPE_NORMAL; @@ -1316,6 +1318,10 @@ pmap_pinit_pd_86(struct pmap *pmap) &pmap->pm_pdirpa); pmap->pm_pdirsize = NBPG; + /* XXX hshoexer */ + pmap->pm_pdir_intel = pmap->pm_pdir; + pmap->pm_pdirpa_intel = pmap->pm_pdirpa; + /* init PDP */ /* zero init area */ bzero((void *)pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t)); @@ -1377,6 +1383,11 @@ pmap_destroy(struct pmap *pmap) &kp_zero); #endif /* NVMM > 0 */ + if (pmap->pm_pdir_intel) { + uvm_km_free(kernel_map, pmap->pm_pdir_intel, pmap->pm_pdirsize); + pmap->pm_pdir_intel = 0; + } + pool_put(&pmap_pmap_pool, pmap); } @@ -1421,11 +1432,21 @@ pmap_switch(struct proc *o, struct proc *p) } else if (o != NULL && pmap == pmap_kernel()) { nlazy_cr3++; } else { - curcpu()->ci_curpmap = pmap; + self->ci_curpmap = pmap; lcr3(pmap->pm_pdirpa); } /* + * Meltdown: iff we're doing separate U+K and U-K page tables, + * then record them in cpu_info for easy access in syscall and + * interrupt trampolines. + */ + if (pmap->pm_pdirpa_intel) { + self->ci_kern_cr3 = pmap->pm_pdirpa; + self->ci_user_cr3 = pmap->pm_pdirpa_intel; + } + + /* * Set the correct descriptor value (i.e. with the * correct code segment X limit) in the GDT. */ @@ -2400,6 +2421,12 @@ out: return error; } +void +pmap_enter_special_86(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int32_t flags) +{ + /* XXX hshoexer nothing yet */ +} + /* * pmap_growkernel: increase usage of KVM space * @@ -2493,10 +2520,10 @@ out: * release the lock if we get an interrupt in a bad moment. */ -volatile int tlb_shoot_wait; +volatile int tlb_shoot_wait __attribute__((section(".kudata"))); -volatile vaddr_t tlb_shoot_addr1; -volatile vaddr_t tlb_shoot_addr2; +volatile vaddr_t tlb_shoot_addr1 __attribute__((section(".kudata"))); +volatile vaddr_t tlb_shoot_addr2 __attribute__((section(".kudata"))); void pmap_tlb_shootpage(struct pmap *pm, vaddr_t va) @@ -2692,6 +2719,8 @@ boolean_t (*pmap_clear_attrs_p)(struct vm_page *, int) = pmap_clear_attrs_86; int (*pmap_enter_p)(pmap_t, vaddr_t, paddr_t, vm_prot_t, int) = pmap_enter_86; +void (*pmap_enter_special_p)(vaddr_t, paddr_t, vm_prot_t, + u_int32_t) = pmap_enter_special_86; boolean_t (*pmap_extract_p)(pmap_t, vaddr_t, paddr_t *) = pmap_extract_86; vaddr_t (*pmap_growkernel_p)(vaddr_t) = pmap_growkernel_86; diff --git a/sys/arch/i386/i386/pmapae.c b/sys/arch/i386/i386/pmapae.c index e4ffa837c9d..8387ce88f8b 100644 --- a/sys/arch/i386/i386/pmapae.c +++ b/sys/arch/i386/i386/pmapae.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmapae.c,v 1.52 2016/10/21 06:20:58 mlarkin Exp $ */ +/* $OpenBSD: pmapae.c,v 1.53 2018/04/11 15:44:08 bluhm Exp $ */ /* * Copyright (c) 2006-2008 Michael Shalayeff @@ -685,6 +685,7 @@ pmap_bootstrap_pae(void) pmap_pte_paddr_p = pmap_pte_paddr_pae; pmap_clear_attrs_p = pmap_clear_attrs_pae; pmap_enter_p = pmap_enter_pae; + pmap_enter_special_p = pmap_enter_special_pae; pmap_extract_p = pmap_extract_pae; pmap_growkernel_p = pmap_growkernel_pae; pmap_page_remove_p = pmap_page_remove_pae; @@ -848,6 +849,10 @@ pmap_pinit_pd_pae(struct pmap *pmap) pmap->pm_pdidx[3] |= PG_V; pmap->pm_pdirsize = 4 * NBPG; + /* XXX hshoexer */ + pmap->pm_pdir_intel = pmap->pm_pdir; + pmap->pm_pdirpa_intel = pmap->pm_pdirpa; + /* init PDP */ /* zero init area */ bzero((void *)pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t)); @@ -1752,6 +1757,12 @@ out: return error; } +void +pmap_enter_special_pae(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int32_t flags) +{ + /* XXX hshoexer nothing yet */ +} + /* * pmap_growkernel: increase usage of KVM space * diff --git a/sys/arch/i386/i386/vector.s b/sys/arch/i386/i386/vector.s index 6b2609a38d7..9a7ffa6c5a4 100644 --- a/sys/arch/i386/i386/vector.s +++ b/sys/arch/i386/i386/vector.s @@ -1,4 +1,4 @@ -/* $OpenBSD: vector.s,v 1.21 2017/05/30 12:41:55 mlarkin Exp $ */ +/* $OpenBSD: vector.s,v 1.22 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: vector.s,v 1.32 1996/01/07 21:29:47 mycroft Exp $ */ /* @@ -72,24 +72,24 @@ * On exit, we jump to Xdoreti(), to process soft interrupts and ASTs. */ #define INTRSTUB(name, num, early_ack, late_ack, mask, unmask, level_mask) \ -IDTVEC(resume_##name##num) ;\ +KIDTVEC(resume_##name##num) ;\ push %ebx ;\ cli ;\ jmp 1f ;\ -IDTVEC(recurse_##name##num) ;\ +KIDTVEC(recurse_##name##num) ;\ pushfl ;\ pushl %cs ;\ pushl %esi ;\ subl $8,%esp /* space for tf_{err,trapno} */ ;\ movl %ebx,%esi ;\ - INTRENTRY ;\ + INTRENTRY(recurse_##name##num) ;\ MAKE_FRAME ;\ push %esi ;\ cli ;\ jmp 1f ;\ -_C_LABEL(Xintr_##name##num): ;\ +IDTVEC(intr_##name##num) ;\ subl $8,%esp /* space for tf_{err,trapno} */ ;\ - INTRENTRY ;\ + INTRENTRY(intr_##name##num) ;\ MAKE_FRAME ;\ mask(num) /* mask it in hardware */ ;\ early_ack(num) /* and allow other intrs */ ;\ @@ -130,15 +130,23 @@ _C_LABEL(Xintr_##name##num): ;\ 6: unmask(num) /* unmask it in hardware */ ;\ late_ack(num) ;\ jmp _C_LABEL(Xdoreti) /* lower spl and do ASTs */ ;\ -IDTVEC(stray_##name##num) ;\ +KIDTVEC(stray_##name##num) ;\ pushl $num ;\ call _C_LABEL(isa_strayintr) ;\ addl $4,%esp ;\ jmp 6b ;\ -IDTVEC(hold_##name##num) ;\ +KIDTVEC(hold_##name##num) ;\ orb $IRQ_BIT(num),CPUVAR(IPENDING) + IRQ_BYTE(num) ;\ + CLIDEBUG ;\ INTRFASTEXIT +#if defined(DIAGNOSTIC) +#define CLIDEBUG \ + movl $0xfa,%esi +#else +#define CLIDEBUG +#endif + #if defined(DEBUG) #define STRAY_INITIALIZE \ xorl %esi,%esi diff --git a/sys/arch/i386/include/asm.h b/sys/arch/i386/include/asm.h index 4ed74fa1dfb..bed16be391f 100644 --- a/sys/arch/i386/include/asm.h +++ b/sys/arch/i386/include/asm.h @@ -1,4 +1,4 @@ -/* $OpenBSD: asm.h,v 1.15 2017/06/29 17:36:16 deraadt Exp $ */ +/* $OpenBSD: asm.h,v 1.16 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: asm.h,v 1.7 1994/10/27 04:15:56 cgd Exp $ */ /*- @@ -96,6 +96,20 @@ #define _ENTRY(x) \ .text; _ALIGN_TEXT; .globl x; .type x,@function; x: +#ifdef _KERNEL +#define KUTEXT .section .kutext, "ax" + +#define IDTVEC(name) \ + KUTEXT; ALIGN_TEXT; \ + .globl X##name; X##name: +#define KIDTVEC(name) \ + .text; ALIGN_TEXT; \ + .globl X##name; X##name: +#define KUENTRY(x) \ + KUTEXT; _ALIGN_TEXT; .globl x; .type x,@function; x: + +#endif /* _KERNEL */ + #if defined(PROF) || defined(GPROF) # define _PROF_PROLOGUE \ pushl %ebp; movl %esp,%ebp; call PIC_PLT(mcount); popl %ebp @@ -113,11 +127,7 @@ #ifdef _KERNEL -#ifdef MULTIPROCESSOR #define CPUVAR(var) %fs:__CONCAT(CPU_INFO_,var) -#else -#define CPUVAR(var) _C_LABEL(cpu_info_primary)+__CONCAT(CPU_INFO_,var) -#endif #endif /* _KERNEL */ diff --git a/sys/arch/i386/include/cpu.h b/sys/arch/i386/include/cpu.h index 9982b3599dc..351c9be6a7c 100644 --- a/sys/arch/i386/include/cpu.h +++ b/sys/arch/i386/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.161 2018/03/31 13:45:03 bluhm Exp $ */ +/* $OpenBSD: cpu.h,v 1.162 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: cpu.h,v 1.35 1996/05/05 19:29:26 christos Exp $ */ /*- @@ -103,7 +103,11 @@ union vmm_cpu_cap { #ifdef _KERNEL /* XXX stuff to move to cpuvar.h later */ struct cpu_info { - struct device *ci_dev; /* our device */ + u_int32_t ci_kern_cr3; /* U+K page table */ + u_int32_t ci_scratch; /* for U<-->K transition */ + +#define ci_PAGEALIGN ci_dev + struct device *ci_dev; /* our device */ struct cpu_info *ci_self; /* pointer to this structure */ struct schedstate_percpu ci_schedstate; /* scheduler state */ struct cpu_info *ci_next; /* next cpu */ @@ -117,6 +121,10 @@ struct cpu_info { u_int ci_acpi_proc_id; u_int32_t ci_randseed; + u_int32_t ci_kern_esp; /* kernel-only stack */ + u_int32_t ci_intr_esp; /* U<-->K trampoline stack */ + u_int32_t ci_user_cr3; /* U-K page table */ + #if defined(MULTIPROCESSOR) struct srp_hazard ci_srp_hazards[SRP_HAZARD_NUM]; #endif @@ -224,7 +232,10 @@ struct cpu_info { * the only CPU on uniprocessors), and the primary CPU is the * first CPU on the CPU info list. */ -extern struct cpu_info cpu_info_primary; +struct cpu_info_full; +extern struct cpu_info_full cpu_info_full_primary; +#define cpu_info_primary (*(struct cpu_info *)((char *)&cpu_info_full_primary + PAGE_SIZE*2 - offsetof(struct cpu_info, ci_PAGEALIGN))) + extern struct cpu_info *cpu_info_list; #define CPU_INFO_ITERATOR int diff --git a/sys/arch/i386/include/cpu_full.h b/sys/arch/i386/include/cpu_full.h new file mode 100644 index 00000000000..ef820a4fd2c --- /dev/null +++ b/sys/arch/i386/include/cpu_full.h @@ -0,0 +1,63 @@ +/* $OpenBSD: cpu_full.h,v 1.1 2018/04/11 15:44:08 bluhm Exp $ */ +/* + * Copyright (c) 2018 Philip Guenther <guenther@openbsd.org> + * Copyright (c) 2018 Hans-Joerg Hoexer <hshoexer@genua.de> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _MACHINE_CPU_FULL_H_ +#define _MACHINE_CPU_FULL_H_ + +#include <sys/param.h> /* offsetof, PAGE_SIZE */ +#include <machine/segments.h> +#include <machine/tss.h> + +struct cpu_info_full { + /* page mapped kRO in u-k */ + union { + struct i386tss u_tss; /* followed by gdt */ + char u_align[PAGE_SIZE]; + } cif_TSS_RO; +#define cif_tss cif_TSS_RO.u_tss + + /* start of page mapped kRW in u-k */ + uint32_t cif_tramp_stack[(PAGE_SIZE + - offsetof(struct cpu_info, ci_PAGEALIGN)) / sizeof(uint32_t)]; + + /* + * Beginning of this hangs over into the kRW page; rest is + * unmapped in u-k + */ + struct cpu_info cif_cpu; +} __aligned(PAGE_SIZE); + +/* idt and align shim must fit exactly in a page */ +CTASSERT(_ALIGN(sizeof(struct gate_descriptor) * NIDT) <= PAGE_SIZE); + +/* tss, align shim, and gdt must fit in a page */ +CTASSERT(_ALIGN(sizeof(struct i386tss)) + + sizeof(struct segment_descriptor) * NGDT < PAGE_SIZE); + +/* verify expected alignment */ +CTASSERT(offsetof(struct cpu_info_full, cif_cpu.ci_PAGEALIGN) % PAGE_SIZE == 0); + +/* verify total size is multiple of page size */ +CTASSERT(sizeof(struct cpu_info_full) % PAGE_SIZE == 0); + +extern struct cpu_info_full cpu_info_full_primary; + +/* Now make sure the cpu_info_primary macro is correct */ +CTASSERT(&cpu_info_primary - &cpu_info_full_primary.cif_cpu == 0); + +#endif /* _MACHINE_CPU_FULL_H_ */ diff --git a/sys/arch/i386/include/cpufunc.h b/sys/arch/i386/include/cpufunc.h index 3677ebec01c..194197d574a 100644 --- a/sys/arch/i386/include/cpufunc.h +++ b/sys/arch/i386/include/cpufunc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpufunc.h,v 1.27 2017/08/08 15:53:55 visa Exp $ */ +/* $OpenBSD: cpufunc.h,v 1.28 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: cpufunc.h,v 1.8 1994/10/27 04:15:59 cgd Exp $ */ /* @@ -289,5 +289,8 @@ breakpoint(void) void amd64_errata(struct cpu_info *); +struct cpu_info_full; +void cpu_enter_pages(struct cpu_info_full *); + #endif /* _KERNEL */ #endif /* !_MACHINE_CPUFUNC_H_ */ diff --git a/sys/arch/i386/include/frame.h b/sys/arch/i386/include/frame.h index 2220c30b7d3..7b3519ffa91 100644 --- a/sys/arch/i386/include/frame.h +++ b/sys/arch/i386/include/frame.h @@ -1,4 +1,4 @@ -/* $OpenBSD: frame.h,v 1.11 2016/02/26 09:29:20 mpi Exp $ */ +/* $OpenBSD: frame.h,v 1.12 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: frame.h,v 1.12 1995/10/11 04:20:08 mycroft Exp $ */ /*- @@ -103,6 +103,47 @@ struct intrframe { }; /* + * iret stack frame + */ +struct iretframe { + int irf_trapno; + int irf_err; + int irf_eip; + int irf_cs; + int irf_eflags; + int irf_esp; + int irf_ss; + /* below used when switching back to VM86 mode */ + int irf_vm86_es; + int irf_vm86_ds; + int irf_vm86_fs; + int irf_vm86_gs; +}; + +/* + * Trampoline stack frame + */ +struct trampframe { + int trf__deadbeef; + int trf__kern_esp; + int trf_fs; + int trf_eax; + int trf_ebp; + int trf_trapno; + int trf_err; + int trf_eip; + int trf_cs; + int trf_eflags; + int trf_esp; + int trf_ss; + /* below used when switching out of VM86 mode */ + int trf_vm86_es; + int trf_vm86_ds; + int trf_vm86_fs; + int trf_vm86_gs; +}; + +/* * Stack frame inside cpu_switch() */ struct switchframe { diff --git a/sys/arch/i386/include/gdt.h b/sys/arch/i386/include/gdt.h index 7435fbaa0b3..5ecc4565d29 100644 --- a/sys/arch/i386/include/gdt.h +++ b/sys/arch/i386/include/gdt.h @@ -1,4 +1,4 @@ -/* $OpenBSD: gdt.h,v 1.16 2018/03/31 13:45:03 bluhm Exp $ */ +/* $OpenBSD: gdt.h,v 1.17 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: gdt.h,v 1.7.10.6 2002/08/19 01:22:36 sommerfeld Exp $ */ /*- @@ -37,7 +37,6 @@ struct pcb; struct pmap; void gdt_alloc_cpu(struct cpu_info *); -int gdt_get_slot(void); void gdt_init(void); void gdt_init_cpu(struct cpu_info *); void gdt_reload_cpu(/* XXX struct cpu_info * */ void); diff --git a/sys/arch/i386/include/pcb.h b/sys/arch/i386/include/pcb.h index 267cfed4e89..57264d0e290 100644 --- a/sys/arch/i386/include/pcb.h +++ b/sys/arch/i386/include/pcb.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pcb.h,v 1.23 2018/03/31 13:45:03 bluhm Exp $ */ +/* $OpenBSD: pcb.h,v 1.24 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: pcb.h,v 1.21 1996/01/08 13:51:42 mycroft Exp $ */ /*- @@ -50,7 +50,7 @@ #include <machine/sysarch.h> /* - * Please not that pcb_savefpu must be aligend to 16 bytes. + * Please note that pcb_savefpu must be aligend to 16 bytes. */ struct pcb { union savefpu pcb_savefpu; /* floating point state for FPU */ diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h index 8751e11be56..d8992fd9763 100644 --- a/sys/arch/i386/include/pmap.h +++ b/sys/arch/i386/include/pmap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.h,v 1.83 2016/10/21 06:20:59 mlarkin Exp $ */ +/* $OpenBSD: pmap.h,v 1.84 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: pmap.h,v 1.44 2000/04/24 17:18:18 thorpej Exp $ */ /* @@ -99,8 +99,18 @@ struct pmap { struct mutex pm_mtx; struct mutex pm_apte_mtx; - paddr_t pm_pdirpa; /* PA of PD (read-only after create) */ - vaddr_t pm_pdir; /* VA of PD (lck by object lock) */ + /* + * pm_pdir : VA of PD when executing in privileged mode + * (lock by objeckt lock) + * pm_pdirpa : PA of PD when executing in privileged mode, + * (read-only after create) + * pm_pdir_intel : VA of PD when executing on Intel CPU in + * usermode (no kernel mappings) + * pm_pdirpa_intel : PA of PD when executing on Intel CPU in + * usermode (no kernel mappings) + */ + paddr_t pm_pdirpa, pm_pdirpa_intel; + vaddr_t pm_pdir, pm_pdir_intel; int pm_pdirsize; /* PD size (4k vs 16k on PAE) */ struct uvm_object pm_obj; /* object (lck by object lock) */ LIST_ENTRY(pmap) pm_list; /* list (lck by pm_list lock) */ @@ -263,6 +273,7 @@ extern u_int32_t (*pmap_pte_bits_p)(vaddr_t); extern paddr_t (*pmap_pte_paddr_p)(vaddr_t); extern boolean_t (*pmap_clear_attrs_p)(struct vm_page *, int); extern int (*pmap_enter_p)(pmap_t, vaddr_t, paddr_t, vm_prot_t, int); +extern void (*pmap_enter_special_p)(vaddr_t, paddr_t, vm_prot_t, u_int32_t); extern boolean_t (*pmap_extract_p)(pmap_t, vaddr_t, paddr_t *); extern vaddr_t (*pmap_growkernel_p)(vaddr_t); extern void (*pmap_page_remove_p)(struct vm_page *); @@ -281,6 +292,7 @@ u_int32_t pmap_pte_bits_pae(vaddr_t); paddr_t pmap_pte_paddr_pae(vaddr_t); boolean_t pmap_clear_attrs_pae(struct vm_page *, int); int pmap_enter_pae(pmap_t, vaddr_t, paddr_t, vm_prot_t, int); +void pmap_enter_special_pae(vaddr_t, paddr_t, vm_prot_t, u_int32_t); boolean_t pmap_extract_pae(pmap_t, vaddr_t, paddr_t *); vaddr_t pmap_growkernel_pae(vaddr_t); void pmap_page_remove_pae(struct vm_page *); @@ -315,6 +327,7 @@ u_int32_t pmap_pte_bits_86(vaddr_t); paddr_t pmap_pte_paddr_86(vaddr_t); boolean_t pmap_clear_attrs_86(struct vm_page *, int); int pmap_enter_86(pmap_t, vaddr_t, paddr_t, vm_prot_t, int); +void pmap_enter_special_86(vaddr_t, paddr_t, vm_prot_t, u_int32_t); boolean_t pmap_extract_86(pmap_t, vaddr_t, paddr_t *); vaddr_t pmap_growkernel_86(vaddr_t); void pmap_page_remove_86(struct vm_page *); @@ -435,6 +448,12 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) return (*pmap_enter_p)(pmap, va, pa, prot, flags); } +__inline static void +pmap_enter_special(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int32_t flags) +{ + (*pmap_enter_special_p)(va, pa, prot, flags); +} + __inline static boolean_t pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pa) { diff --git a/sys/arch/i386/isa/icu.s b/sys/arch/i386/isa/icu.s index 022e8d3e3dc..102878407dc 100644 --- a/sys/arch/i386/isa/icu.s +++ b/sys/arch/i386/isa/icu.s @@ -1,4 +1,4 @@ -/* $OpenBSD: icu.s,v 1.33 2015/07/16 05:10:14 guenther Exp $ */ +/* $OpenBSD: icu.s,v 1.34 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: icu.s,v 1.45 1996/01/07 03:59:34 mycroft Exp $ */ /*- @@ -44,7 +44,7 @@ _C_LABEL(imen): * esi - address to resume loop at * edi - scratch for Xsoftnet */ -IDTVEC(spllower) +KIDTVEC(spllower) pushl %ebx pushl %esi pushl %edi @@ -76,7 +76,7 @@ IDTVEC(spllower) * esi - address to resume loop at * edi - scratch for Xsoftnet */ -IDTVEC(doreti) +KIDTVEC(doreti) popl %ebx # get previous priority movl $1f,%esi # address to resume loop at 1: movl %ebx,%eax @@ -108,14 +108,18 @@ IDTVEC(doreti) addl $4,%esp cli jmp 2b -3: INTRFASTEXIT +3: +#ifdef DIAGNOSTIC + movl $0xf9,%esi +#endif + INTRFASTEXIT /* * Soft interrupt handlers */ -IDTVEC(softtty) +KIDTVEC(softtty) movl $IPL_SOFTTTY,%eax movl %eax,CPL sti @@ -124,7 +128,7 @@ IDTVEC(softtty) addl $4,%esp jmp *%esi -IDTVEC(softnet) +KIDTVEC(softnet) movl $IPL_SOFTNET,%eax movl %eax,CPL sti @@ -134,7 +138,7 @@ IDTVEC(softnet) jmp *%esi #undef DONETISR -IDTVEC(softclock) +KIDTVEC(softclock) movl $IPL_SOFTCLOCK,%eax movl %eax,CPL sti diff --git a/sys/arch/i386/isa/npx.c b/sys/arch/i386/isa/npx.c index 187887a50dc..cf4734ed2da 100644 --- a/sys/arch/i386/isa/npx.c +++ b/sys/arch/i386/isa/npx.c @@ -1,4 +1,4 @@ -/* $OpenBSD: npx.c,v 1.68 2018/03/31 22:52:30 bluhm Exp $ */ +/* $OpenBSD: npx.c,v 1.69 2018/04/11 15:44:08 bluhm Exp $ */ /* $NetBSD: npx.c,v 1.57 1996/05/12 23:12:24 mycroft Exp $ */ #if 0 @@ -131,8 +131,10 @@ enum npx_type { }; static enum npx_type npx_type; -static volatile u_int npx_intrs_while_probing; -static volatile u_int npx_traps_while_probing; +static volatile u_int npx_intrs_while_probing + __attribute__((section(".kudata"))); +static volatile u_int npx_traps_while_probing + __attribute__((section(".kudata"))); extern int i386_fpu_present; extern int i386_fpu_exception; |