1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
/* $OpenBSD: fpu.h,v 1.18 2023/05/22 00:39:57 guenther Exp $ */
/* $NetBSD: fpu.h,v 1.1 2003/04/26 18:39:40 fvdl Exp $ */
#ifndef _MACHINE_FPU_H_
#define _MACHINE_FPU_H_
#include <sys/types.h>
/*
* If the CPU supports xsave/xrstor then we use them so that we can provide
* AVX support. Otherwise we require fxsave/fxrstor, as the SSE registers
* are part of the ABI for passing floating point values.
* While fxsave/fxrstor only required 16-byte alignment for the save area,
* xsave/xrstor requires the save area to have 64-byte alignment.
*/
struct fxsave64 {
u_int16_t fx_fcw;
u_int16_t fx_fsw;
u_int8_t fx_ftw;
u_int8_t fx_unused1;
u_int16_t fx_fop;
u_int64_t fx_rip;
u_int64_t fx_rdp;
u_int32_t fx_mxcsr;
u_int32_t fx_mxcsr_mask;
u_int64_t fx_st[8][2]; /* 8 normal FP regs */
u_int64_t fx_xmm[16][2]; /* 16 SSE2 registers */
u_int8_t fx_unused3[96];
} __packed;
struct xstate_hdr {
uint64_t xstate_bv;
uint64_t xstate_xcomp_bv;
uint8_t xstate_rsrv0[8];
uint8_t xstate_rsrv[40];
} __packed;
struct savefpu {
struct fxsave64 fp_fxsave; /* see above */
struct xstate_hdr fp_xstate;
u_int64_t fp_ymm[16][2];
};
/*
* The i387 defaults to Intel extended precision mode and round to nearest,
* with all exceptions masked.
*/
#define __INITIAL_NPXCW__ 0x037f
#define __INITIAL_MXCSR__ 0x1f80
#define __INITIAL_MXCSR_MASK__ 0xffbf
#ifdef _KERNEL
/*
* XXX
*/
struct trapframe;
struct cpu_info;
extern size_t fpu_save_len;
extern uint32_t fpu_mxcsr_mask;
extern uint64_t xsave_mask;
void fpuinit(struct cpu_info *);
int fputrap(int _type);
void fpusave(struct savefpu *);
void fpusavereset(struct savefpu *);
void fpu_kernel_enter(void);
void fpu_kernel_exit(void);
int xrstor_user(struct savefpu *_addr, uint64_t _mask);
#define fpureset() \
xrstor_user(&proc0.p_addr->u_pcb.pcb_savefpu, xsave_mask)
int xsetbv_user(uint32_t _reg, uint64_t _mask);
#define fninit() __asm("fninit")
#define fwait() __asm("fwait")
/* should be fxsave64, but where we use this it doesn't matter */
#define fxsave(addr) __asm("fxsave %0" : "=m" (*addr))
#define ldmxcsr(addr) __asm("ldmxcsr %0" : : "m" (*addr))
#define fldcw(addr) __asm("fldcw %0" : : "m" (*addr))
static inline void
xsave(struct savefpu *addr, uint64_t mask)
{
uint32_t lo, hi;
lo = mask;
hi = mask >> 32;
/* should be xsave64, but where we use this it doesn't matter */
__asm volatile("xsave %0" : "=m" (*addr) : "a" (lo), "d" (hi) :
"memory");
}
#endif
#endif /* _MACHINE_FPU_H_ */
|