summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Voutila <dv@cvs.openbsd.org>2022-11-10 11:46:40 +0000
committerDave Voutila <dv@cvs.openbsd.org>2022-11-10 11:46:40 +0000
commit3195206a8366c6f23ef6b05359aef42611620862 (patch)
tree1ec5f674d1d23ec1b02df4e1934d4ddce68728ec
parent95e0375e34b18c0acdf59d95b25146b33e267cd7 (diff)
vmd(8): import mmio decode and emulation, disabled for now.
The initial mmio support for vmd adds support for only specific MOV and MOVZX instructions. Plan is to begin iterating in-tree on other missing pieces. All functionality is gated behind an #if for now. Only change to vmm(4) is reordering register #define's in vmmvar.h. ok mlarkin@
-rw-r--r--sys/arch/amd64/include/vmmvar.h33
-rw-r--r--usr.sbin/vmd/Makefile4
-rw-r--r--usr.sbin/vmd/mmio.c1044
-rw-r--r--usr.sbin/vmd/mmio.h138
-rw-r--r--usr.sbin/vmd/vm.c97
5 files changed, 1283 insertions, 33 deletions
diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h
index 320cbd3db3f..94feca15471 100644
--- a/sys/arch/amd64/include/vmmvar.h
+++ b/sys/arch/amd64/include/vmmvar.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: vmmvar.h,v 1.83 2022/11/09 17:53:12 dv Exp $ */
+/* $OpenBSD: vmmvar.h,v 1.84 2022/11/10 11:46:39 dv Exp $ */
/*
* Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
*
@@ -377,22 +377,23 @@ struct vcpu_segment_info {
uint64_t vsi_base;
};
+/* The GPRS are ordered to assist instruction decode. */
#define VCPU_REGS_RAX 0
-#define VCPU_REGS_RBX 1
-#define VCPU_REGS_RCX 2
-#define VCPU_REGS_RDX 3
-#define VCPU_REGS_RSI 4
-#define VCPU_REGS_RDI 5
-#define VCPU_REGS_R8 6
-#define VCPU_REGS_R9 7
-#define VCPU_REGS_R10 8
-#define VCPU_REGS_R11 9
-#define VCPU_REGS_R12 10
-#define VCPU_REGS_R13 11
-#define VCPU_REGS_R14 12
-#define VCPU_REGS_R15 13
-#define VCPU_REGS_RSP 14
-#define VCPU_REGS_RBP 15
+#define VCPU_REGS_RCX 1
+#define VCPU_REGS_RDX 2
+#define VCPU_REGS_RBX 3
+#define VCPU_REGS_RSP 4
+#define VCPU_REGS_RBP 5
+#define VCPU_REGS_RSI 6
+#define VCPU_REGS_RDI 7
+#define VCPU_REGS_R8 8
+#define VCPU_REGS_R9 9
+#define VCPU_REGS_R10 10
+#define VCPU_REGS_R11 11
+#define VCPU_REGS_R12 12
+#define VCPU_REGS_R13 13
+#define VCPU_REGS_R14 14
+#define VCPU_REGS_R15 15
#define VCPU_REGS_RIP 16
#define VCPU_REGS_RFLAGS 17
#define VCPU_REGS_NGPRS (VCPU_REGS_RFLAGS + 1)
diff --git a/usr.sbin/vmd/Makefile b/usr.sbin/vmd/Makefile
index 42e94fd394b..d0e7d0c2fb1 100644
--- a/usr.sbin/vmd/Makefile
+++ b/usr.sbin/vmd/Makefile
@@ -1,11 +1,11 @@
-# $OpenBSD: Makefile,v 1.27 2022/09/13 10:28:19 martijn Exp $
+# $OpenBSD: Makefile,v 1.28 2022/11/10 11:46:39 dv Exp $
.if ${MACHINE} == "amd64"
PROG= vmd
SRCS= vmd.c control.c log.c priv.c proc.c config.c vmm.c
SRCS+= vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c
-SRCS+= ns8250.c i8253.c dhcp.c packet.c
+SRCS+= ns8250.c i8253.c dhcp.c packet.c mmio.c
SRCS+= parse.y atomicio.c vioscsi.c vioraw.c vioqcow2.c fw_cfg.c
SRCS+= vm_agentx.c
diff --git a/usr.sbin/vmd/mmio.c b/usr.sbin/vmd/mmio.c
new file mode 100644
index 00000000000..7348fbf0a11
--- /dev/null
+++ b/usr.sbin/vmd/mmio.c
@@ -0,0 +1,1044 @@
+/* $OpenBSD: mmio.c,v 1.1 2022/11/10 11:46:39 dv Exp $ */
+
+/*
+ * Copyright (c) 2022 Dave Voutila <dv@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <machine/specialreg.h>
+
+#include "vmd.h"
+#include "mmio.h"
+
+#define MMIO_DEBUG 0
+
+extern char* __progname;
+
+struct x86_decode_state {
+ uint8_t s_bytes[15];
+ size_t s_len;
+ size_t s_idx;
+};
+
+enum decode_result {
+ DECODE_ERROR = 0, /* Something went wrong. */
+ DECODE_DONE, /* Decode success and no more work needed. */
+ DECODE_MORE, /* Decode success and more work required. */
+};
+
+static const char *str_cpu_mode(int);
+static const char *str_decode_res(enum decode_result);
+static const char *str_opcode(struct x86_opcode *);
+static const char *str_operand_enc(struct x86_opcode *);
+static const char *str_reg(int);
+static const char *str_sreg(int);
+static int detect_cpu_mode(struct vcpu_reg_state *);
+
+static enum decode_result decode_prefix(struct x86_decode_state *,
+ struct x86_insn *);
+static enum decode_result decode_opcode(struct x86_decode_state *,
+ struct x86_insn *);
+static enum decode_result decode_modrm(struct x86_decode_state *,
+ struct x86_insn *);
+static int get_modrm_reg(struct x86_insn *);
+static int get_modrm_addr(struct x86_insn *, struct vcpu_reg_state *vrs);
+static enum decode_result decode_disp(struct x86_decode_state *,
+ struct x86_insn *);
+static enum decode_result decode_sib(struct x86_decode_state *,
+ struct x86_insn *);
+static enum decode_result decode_imm(struct x86_decode_state *,
+ struct x86_insn *);
+
+static enum decode_result peek_byte(struct x86_decode_state *, uint8_t *);
+static enum decode_result next_byte(struct x86_decode_state *, uint8_t *);
+static enum decode_result next_value(struct x86_decode_state *, size_t,
+ uint64_t *);
+static int is_valid_state(struct x86_decode_state *, const char *);
+
+static int emulate_mov(struct x86_insn *, struct vm_exit *);
+static int emulate_movzx(struct x86_insn *, struct vm_exit *);
+
+/* Lookup table for 1-byte opcodes, in opcode alphabetical order. */
+const enum x86_opcode_type x86_1byte_opcode_tbl[255] = {
+ /* MOV */
+ [0x88] = OP_MOV,
+ [0x89] = OP_MOV,
+ [0x8A] = OP_MOV,
+ [0x8B] = OP_MOV,
+ [0x8C] = OP_MOV,
+ [0xA0] = OP_MOV,
+ [0xA1] = OP_MOV,
+ [0xA2] = OP_MOV,
+ [0xA3] = OP_MOV,
+
+ /* MOVS */
+ [0xA4] = OP_UNSUPPORTED,
+ [0xA5] = OP_UNSUPPORTED,
+
+ [ESCAPE] = OP_TWO_BYTE,
+};
+
+/* Lookup table for 1-byte operand encodings, in opcode alphabetical order. */
+const enum x86_operand_enc x86_1byte_operand_enc_tbl[255] = {
+ /* MOV */
+ [0x88] = OP_ENC_MR,
+ [0x89] = OP_ENC_MR,
+ [0x8A] = OP_ENC_RM,
+ [0x8B] = OP_ENC_RM,
+ [0x8C] = OP_ENC_MR,
+ [0xA0] = OP_ENC_FD,
+ [0xA1] = OP_ENC_FD,
+ [0xA2] = OP_ENC_TD,
+ [0xA3] = OP_ENC_TD,
+
+ /* MOVS */
+ [0xA4] = OP_ENC_ZO,
+ [0xA5] = OP_ENC_ZO,
+};
+
+const enum x86_opcode_type x86_2byte_opcode_tbl[255] = {
+ /* MOVZX */
+ [0xB6] = OP_MOVZX,
+ [0xB7] = OP_MOVZX,
+};
+
+const enum x86_operand_enc x86_2byte_operand_enc_table[255] = {
+ /* MOVZX */
+ [0xB6] = OP_ENC_RM,
+ [0xB7] = OP_ENC_RM,
+};
+
+/*
+ * peek_byte
+ *
+ * Fetch the next byte fron the instruction bytes without advancing the
+ * position in the stream.
+ *
+ * Return values:
+ * DECODE_DONE: byte was found and is the last in the stream
+ * DECODE_MORE: byte was found and there are more remaining to be read
+ * DECODE_ERROR: state is invalid and not byte was found, *byte left unchanged
+ */
+static enum decode_result
+peek_byte(struct x86_decode_state *state, uint8_t *byte)
+{
+ enum decode_result res;
+
+ if (state == NULL)
+ return (DECODE_ERROR);
+
+ if (state->s_idx == state->s_len)
+ return (DECODE_ERROR);
+
+ if (state->s_idx + 1 == state->s_len)
+ res = DECODE_DONE;
+ else
+ res = DECODE_MORE;
+
+ if (byte != NULL)
+ *byte = state->s_bytes[state->s_idx];
+ return (res);
+}
+
+/*
+ * next_byte
+ *
+ * Fetch the next byte fron the instruction bytes, advancing the position in the
+ * stream and mutating decode state.
+ *
+ * Return values:
+ * DECODE_DONE: byte was found and is the last in the stream
+ * DECODE_MORE: byte was found and there are more remaining to be read
+ * DECODE_ERROR: state is invalid and not byte was found, *byte left unchanged
+ */
+static enum decode_result
+next_byte(struct x86_decode_state *state, uint8_t *byte)
+{
+ uint8_t next;
+
+ /* Cheat and see if we're going to fail. */
+ if (peek_byte(state, &next) == DECODE_ERROR)
+ return (DECODE_ERROR);
+
+ if (byte != NULL)
+ *byte = next;
+ state->s_idx++;
+
+ return (state->s_idx < state->s_len ? DECODE_MORE : DECODE_DONE);
+}
+
+/*
+ * Fetch the next `n' bytes as a single uint64_t value.
+ */
+static enum decode_result
+next_value(struct x86_decode_state *state, size_t n, uint64_t *value)
+{
+ uint8_t bytes[8];
+ size_t i;
+ enum decode_result res;
+
+ if (value == NULL)
+ return (DECODE_ERROR);
+
+ if (n == 0 || n > sizeof(bytes))
+ return (DECODE_ERROR);
+
+ memset(bytes, 0, sizeof(bytes));
+ for (i = 0; i < n; i++)
+ if ((res = next_byte(state, &bytes[i])) == DECODE_ERROR)
+ return (DECODE_ERROR);
+
+ *value = *((uint64_t*)bytes);
+
+ return (res);
+}
+
+/*
+ * is_valid_state
+ *
+ * Validate the decode state looks viable.
+ *
+ * Returns:
+ * 1: if state is valid
+ * 0: if an invariant is detected
+ */
+static int
+is_valid_state(struct x86_decode_state *state, const char *fn_name)
+{
+ const char *s = (fn_name != NULL) ? fn_name : __func__;
+
+ if (state == NULL) {
+ log_warnx("%s: null state", s);
+ return (0);
+ }
+ if (state->s_len > sizeof(state->s_bytes)) {
+ log_warnx("%s: invalid length", s);
+ return (0);
+ }
+ if (state->s_idx + 1 > state->s_len) {
+ log_warnx("%s: invalid index", s);
+ return (0);
+ }
+
+ return (1);
+}
+
+#ifdef MMIO_DEBUG
+static void
+dump_regs(struct vcpu_reg_state *vrs)
+{
+ size_t i;
+ struct vcpu_segment_info *vsi;
+
+ for (i = 0; i < VCPU_REGS_NGPRS; i++)
+ log_info("%s: %s 0x%llx", __progname, str_reg(i),
+ vrs->vrs_gprs[i]);
+
+ for (i = 0; i < VCPU_REGS_NSREGS; i++) {
+ vsi = &vrs->vrs_sregs[i];
+ log_info("%s: %s { sel: 0x%04x, lim: 0x%08x, ar: 0x%08x, "
+ "base: 0x%llx }", __progname, str_sreg(i),
+ vsi->vsi_sel, vsi->vsi_limit, vsi->vsi_ar, vsi->vsi_base);
+ }
+}
+
+static void
+dump_insn(struct x86_insn *insn)
+{
+ log_info("instruction { %s, enc=%s, len=%d, mod=0x%02x, ("
+ "reg=%s, addr=0x%lx) sib=0x%02x }",
+ str_opcode(&insn->insn_opcode),
+ str_operand_enc(&insn->insn_opcode), insn->insn_bytes_len,
+ insn->insn_modrm, str_reg(insn->insn_reg),
+ insn->insn_gva, insn->insn_sib);
+}
+#endif /* MMIO_DEBUG */
+
+static const char *
+str_cpu_mode(int mode)
+{
+ switch (mode) {
+ case VMM_CPU_MODE_REAL: return "REAL";
+ case VMM_CPU_MODE_PROT: return "PROT";
+ case VMM_CPU_MODE_PROT32: return "PROT32";
+ case VMM_CPU_MODE_COMPAT: return "COMPAT";
+ case VMM_CPU_MODE_LONG: return "LONG";
+ default: return "UKNOWN";
+ }
+}
+
+__unused static const char *
+str_decode_res(enum decode_result res) {
+ switch (res) {
+ case DECODE_DONE: return "DONE";
+ case DECODE_MORE: return "MORE";
+ case DECODE_ERROR: return "ERROR";
+ default: return "UNKNOWN";
+ }
+}
+
+static const char *
+str_opcode(struct x86_opcode *opcode)
+{
+ switch (opcode->op_type) {
+ case OP_IN: return "IN";
+ case OP_INS: return "INS";
+ case OP_MOV: return "MOV";
+ case OP_MOVZX: return "MOVZX";
+ case OP_OUT: return "OUT";
+ case OP_OUTS: return "OUTS";
+ case OP_UNSUPPORTED: return "UNSUPPORTED";
+ default: return "UNKNOWN";
+ }
+}
+
+static const char *
+str_operand_enc(struct x86_opcode *opcode)
+{
+ switch (opcode->op_encoding) {
+ case OP_ENC_I: return "I";
+ case OP_ENC_MI: return "MI";
+ case OP_ENC_MR: return "MR";
+ case OP_ENC_RM: return "RM";
+ case OP_ENC_FD: return "FD";
+ case OP_ENC_TD: return "TD";
+ case OP_ENC_OI: return "OI";
+ case OP_ENC_ZO: return "ZO";
+ default: return "UNKNOWN";
+ }
+}
+
+static const char *
+str_reg(int reg) {
+ switch (reg) {
+ case VCPU_REGS_RAX: return "RAX";
+ case VCPU_REGS_RCX: return "RCX";
+ case VCPU_REGS_RDX: return "RDX";
+ case VCPU_REGS_RBX: return "RBX";
+ case VCPU_REGS_RSI: return "RSI";
+ case VCPU_REGS_RDI: return "RDI";
+ case VCPU_REGS_R8: return " R8";
+ case VCPU_REGS_R9: return " R9";
+ case VCPU_REGS_R10: return "R10";
+ case VCPU_REGS_R11: return "R11";
+ case VCPU_REGS_R12: return "R12";
+ case VCPU_REGS_R13: return "R13";
+ case VCPU_REGS_R14: return "R14";
+ case VCPU_REGS_R15: return "R15";
+ case VCPU_REGS_RSP: return "RSP";
+ case VCPU_REGS_RBP: return "RBP";
+ case VCPU_REGS_RIP: return "RIP";
+ case VCPU_REGS_RFLAGS: return "RFLAGS";
+ default: return "UNKNOWN";
+ }
+}
+
+static const char *
+str_sreg(int sreg) {
+ switch (sreg) {
+ case VCPU_REGS_CS: return "CS";
+ case VCPU_REGS_DS: return "DS";
+ case VCPU_REGS_ES: return "ES";
+ case VCPU_REGS_FS: return "FS";
+ case VCPU_REGS_GS: return "GS";
+ case VCPU_REGS_SS: return "GS";
+ case VCPU_REGS_LDTR: return "LDTR";
+ case VCPU_REGS_TR: return "TR";
+ default: return "UKNOWN";
+ }
+}
+
+static int
+detect_cpu_mode(struct vcpu_reg_state *vrs)
+{
+ uint64_t cr0, cr4, cs, efer, rflags;
+
+ /* Is protected mode enabled? */
+ cr0 = vrs->vrs_crs[VCPU_REGS_CR0];
+ if (!(cr0 & CR0_PE))
+ return (VMM_CPU_MODE_REAL);
+
+ cr4 = vrs->vrs_crs[VCPU_REGS_CR4];
+ cs = vrs->vrs_sregs[VCPU_REGS_CS].vsi_ar;
+ efer = vrs->vrs_msrs[VCPU_REGS_EFER];
+ rflags = vrs->vrs_gprs[VCPU_REGS_RFLAGS];
+
+ /* Check for Long modes. */
+ if ((efer & EFER_LME) && (cr4 & CR4_PAE) && (cr0 & CR0_PG)) {
+ if (cs & CS_L) {
+ /* Long Modes */
+ if (!(cs & CS_D))
+ return (VMM_CPU_MODE_LONG);
+ log_warnx("%s: invalid cpu mode", __progname);
+ return (VMM_CPU_MODE_UNKNOWN);
+ } else {
+ /* Compatability Modes */
+ if (cs & CS_D) /* XXX Add Compat32 mode */
+ return (VMM_CPU_MODE_UNKNOWN);
+ return (VMM_CPU_MODE_COMPAT);
+ }
+ }
+
+ /* Check for 32-bit Protected Mode. */
+ if (cs & CS_D)
+ return (VMM_CPU_MODE_PROT32);
+
+ /* Check for virtual 8086 mode. */
+ if (rflags & EFLAGS_VM) {
+ /* XXX add Virtual8086 mode */
+ log_warnx("%s: Virtual 8086 mode", __progname);
+ return (VMM_CPU_MODE_UNKNOWN);
+ }
+
+ /* Can't determine mode. */
+ log_warnx("%s: invalid cpu mode", __progname);
+ return (VMM_CPU_MODE_UNKNOWN);
+}
+
+static enum decode_result
+decode_prefix(struct x86_decode_state *state, struct x86_insn *insn)
+{
+ enum decode_result res = DECODE_ERROR;
+ struct x86_prefix *prefix;
+ uint8_t byte;
+
+ if (!is_valid_state(state, __func__) || insn == NULL)
+ return (-1);
+
+ prefix = &insn->insn_prefix;
+ memset(prefix, 0, sizeof(*prefix));
+
+ /*
+ * Decode prefixes. The last of its kind wins. The behavior is undefined
+ * in the Intel SDM (see Vol 2, 2.1.1 Instruction Prefixes.)
+ */
+ while ((res = peek_byte(state, &byte)) != DECODE_ERROR) {
+ switch (byte) {
+ case LEG_1_LOCK:
+ case LEG_1_REPNE:
+ case LEG_1_REP:
+ prefix->pfx_group1 = byte;
+ break;
+ case LEG_2_CS:
+ case LEG_2_SS:
+ case LEG_2_DS:
+ case LEG_2_ES:
+ case LEG_2_FS:
+ case LEG_2_GS:
+ prefix->pfx_group2 = byte;
+ break;
+ case LEG_3_OPSZ:
+ prefix->pfx_group3 = byte;
+ break;
+ case LEG_4_ADDRSZ:
+ prefix->pfx_group4 = byte;
+ break;
+ case REX_BASE...REX_BASE + 0x0F:
+ if (insn->insn_cpu_mode == VMM_CPU_MODE_LONG)
+ prefix->pfx_rex = byte;
+ else /* INC encountered */
+ return (DECODE_ERROR);
+ break;
+ case VEX_2_BYTE:
+ case VEX_3_BYTE:
+ log_warnx("%s: VEX not supported", __func__);
+ return (DECODE_ERROR);
+ default:
+ /* Something other than a valid prefix. */
+ return (DECODE_MORE);
+ }
+ /* Advance our position. */
+ next_byte(state, NULL);
+ }
+
+ return (res);
+}
+
+static enum decode_result
+decode_modrm(struct x86_decode_state *state, struct x86_insn *insn)
+{
+ enum decode_result res;
+ uint8_t byte;
+
+ if (!is_valid_state(state, __func__) || insn == NULL)
+ return (DECODE_ERROR);
+
+ insn->insn_modrm_valid = 0;
+
+ /* Check the operand encoding to see if we fetch a byte or abort. */
+ switch (insn->insn_opcode.op_encoding) {
+ case OP_ENC_MR:
+ case OP_ENC_RM:
+ case OP_ENC_MI:
+ res = next_byte(state, &byte);
+ if (res == DECODE_ERROR)
+ log_warnx("%s: failed to get modrm byte", __func__);
+ insn->insn_modrm = byte;
+ insn->insn_modrm_valid = 1;
+ break;
+
+ case OP_ENC_I:
+ case OP_ENC_OI:
+ log_warnx("%s: instruction does not need memory assist",
+ __func__);
+ res = DECODE_ERROR;
+ break;
+
+ default:
+ /* Peek to see if we're done decode. */
+ res = peek_byte(state, NULL);
+ }
+
+ return (res);
+}
+
+static int
+get_modrm_reg(struct x86_insn *insn)
+{
+ if (insn == NULL)
+ return (-1);
+
+ if (insn->insn_modrm_valid) {
+ switch (MODRM_REGOP(insn->insn_modrm)) {
+ case 0:
+ insn->insn_reg = VCPU_REGS_RAX;
+ break;
+ case 1:
+ insn->insn_reg = VCPU_REGS_RCX;
+ break;
+ case 2:
+ insn->insn_reg = VCPU_REGS_RDX;
+ break;
+ case 3:
+ insn->insn_reg = VCPU_REGS_RBX;
+ break;
+ case 4:
+ insn->insn_reg = VCPU_REGS_RSP;
+ break;
+ case 5:
+ insn->insn_reg = VCPU_REGS_RBP;
+ break;
+ case 6:
+ insn->insn_reg = VCPU_REGS_RSI;
+ break;
+ case 7:
+ insn->insn_reg = VCPU_REGS_RDI;
+ break;
+ }
+ }
+
+ /* REX R bit selects extended registers in LONG mode. */
+ if (insn->insn_prefix.pfx_rex & REX_R)
+ insn->insn_reg += 8;
+
+ return (0);
+}
+
+static int
+get_modrm_addr(struct x86_insn *insn, struct vcpu_reg_state *vrs)
+{
+ uint8_t mod, rm;
+ vaddr_t addr = 0x0UL;
+
+ if (insn == NULL || vrs == NULL)
+ return (-1);
+
+ if (insn->insn_modrm_valid) {
+ rm = MODRM_RM(insn->insn_modrm);
+ mod = MODRM_MOD(insn->insn_modrm);
+
+ switch (rm) {
+ case 0b000:
+ addr = vrs->vrs_gprs[VCPU_REGS_RAX];
+ break;
+ case 0b001:
+ addr = vrs->vrs_gprs[VCPU_REGS_RCX];
+ break;
+ case 0b010:
+ addr = vrs->vrs_gprs[VCPU_REGS_RDX];
+ break;
+ case 0b011:
+ addr = vrs->vrs_gprs[VCPU_REGS_RBX];
+ break;
+ case 0b100:
+ if (mod == 0b11)
+ addr = vrs->vrs_gprs[VCPU_REGS_RSP];
+ break;
+ case 0b101:
+ if (mod != 0b00)
+ addr = vrs->vrs_gprs[VCPU_REGS_RBP];
+ break;
+ case 0b110:
+ addr = vrs->vrs_gprs[VCPU_REGS_RSI];
+ break;
+ case 0b111:
+ addr = vrs->vrs_gprs[VCPU_REGS_RDI];
+ break;
+ }
+
+ insn->insn_gva = addr;
+ }
+
+ return (0);
+}
+
+static enum decode_result
+decode_disp(struct x86_decode_state *state, struct x86_insn *insn)
+{
+ enum decode_result res = DECODE_ERROR;
+ uint64_t disp = 0;
+
+ if (!is_valid_state(state, __func__) || insn == NULL)
+ return (DECODE_ERROR);
+
+ if (!insn->insn_modrm_valid)
+ return (DECODE_ERROR);
+
+ switch (MODRM_MOD(insn->insn_modrm)) {
+ case 0x00:
+ insn->insn_disp_type = DISP_0;
+ res = DECODE_MORE;
+ break;
+ case 0x01:
+ insn->insn_disp_type = DISP_1;
+ res = next_value(state, 1, &disp);
+ if (res == DECODE_ERROR)
+ return (res);
+ insn->insn_disp = disp;
+ break;
+ case 0x02:
+ if (insn->insn_prefix.pfx_group4 == LEG_4_ADDRSZ) {
+ insn->insn_disp_type = DISP_2;
+ res = next_value(state, 2, &disp);
+ } else {
+ insn->insn_disp_type = DISP_4;
+ res = next_value(state, 4, &disp);
+ }
+ if (res == DECODE_ERROR)
+ return (res);
+ insn->insn_disp = disp;
+ break;
+ default:
+ insn->insn_disp_type = DISP_NONE;
+ res = DECODE_MORE;
+ }
+
+ return (res);
+}
+
+static enum decode_result
+decode_opcode(struct x86_decode_state *state, struct x86_insn *insn)
+{
+ enum decode_result res;
+ enum x86_opcode_type type;
+ enum x86_operand_enc enc;
+ struct x86_opcode *opcode = &insn->insn_opcode;
+ uint8_t byte, byte2;
+
+ if (!is_valid_state(state, __func__) || insn == NULL)
+ return (-1);
+
+ memset(opcode, 0, sizeof(*opcode));
+
+ res = next_byte(state, &byte);
+ if (res == DECODE_ERROR)
+ return (res);
+
+ type = x86_1byte_opcode_tbl[byte];
+ switch(type) {
+ case OP_UNKNOWN:
+ case OP_UNSUPPORTED:
+ log_warnx("%s: unsupported opcode", __func__);
+ return (DECODE_ERROR);
+
+ case OP_TWO_BYTE:
+ res = next_byte(state, &byte2);
+ if (res == DECODE_ERROR)
+ return (res);
+
+ type = x86_2byte_opcode_tbl[byte2];
+ if (type == OP_UNKNOWN || type == OP_UNSUPPORTED) {
+ log_warnx("%s: unsupported 2-byte opcode", __func__);
+ return (DECODE_ERROR);
+ }
+
+ opcode->op_bytes[0] = byte;
+ opcode->op_bytes[1] = byte2;
+ opcode->op_bytes_len = 2;
+ enc = x86_2byte_operand_enc_table[byte2];
+ break;
+
+ default:
+ /* We've potentially got a known 1-byte opcode. */
+ opcode->op_bytes[0] = byte;
+ opcode->op_bytes_len = 1;
+ enc = x86_1byte_operand_enc_tbl[byte];
+ }
+
+ if (enc == OP_ENC_UNKNOWN)
+ return (DECODE_ERROR);
+
+ opcode->op_type = type;
+ opcode->op_encoding = enc;
+
+ return (res);
+}
+
+static enum decode_result
+decode_sib(struct x86_decode_state *state, struct x86_insn *insn)
+{
+ enum decode_result res;
+ uint8_t byte;
+
+ if (!is_valid_state(state, __func__) || insn == NULL)
+ return (-1);
+
+ /* SIB is optional, so assume we will be continuing. */
+ res = DECODE_MORE;
+
+ insn->insn_sib_valid = 0;
+ if (!insn->insn_modrm_valid)
+ return (res);
+
+ /* XXX is SIB valid in all cpu modes? */
+ if (MODRM_RM(insn->insn_modrm) == 0b100) {
+ res = next_byte(state, &byte);
+ if (res != DECODE_ERROR) {
+ insn->insn_sib_valid = 1;
+ insn->insn_sib = byte;
+ }
+ }
+
+ return (res);
+}
+
+static enum decode_result
+decode_imm(struct x86_decode_state *state, struct x86_insn *insn)
+{
+ enum decode_result res;
+ size_t num_bytes;
+ uint64_t value;
+
+ if (!is_valid_state(state, __func__) || insn == NULL)
+ return (DECODE_ERROR);
+
+ /* Only handle MI encoded instructions. Others shouldn't need assist. */
+ if (insn->insn_opcode.op_encoding != OP_ENC_MI)
+ return (DECODE_DONE);
+
+ /* Exceptions related to MOV instructions. */
+ if (insn->insn_opcode.op_type == OP_MOV) {
+ switch (insn->insn_opcode.op_bytes[0]) {
+ case 0xC6:
+ num_bytes = 1;
+ break;
+ case 0xC7:
+ if (insn->insn_cpu_mode == VMM_CPU_MODE_REAL)
+ num_bytes = 2;
+ else
+ num_bytes = 4;
+ break;
+ default:
+ log_warnx("%s: cannot decode immediate bytes for MOV",
+ __func__);
+ return (DECODE_ERROR);
+ }
+ } else {
+ /* Fallback to interpreting based on cpu mode and REX. */
+ if (insn->insn_cpu_mode == VMM_CPU_MODE_REAL)
+ num_bytes = 2;
+ else if (insn->insn_prefix.pfx_rex == REX_NONE)
+ num_bytes = 4;
+ else
+ num_bytes = 8;
+ }
+
+ res = next_value(state, num_bytes, &value);
+ if (res != DECODE_ERROR) {
+ insn->insn_immediate = value;
+ insn->insn_immediate_len = num_bytes;
+ }
+
+ return (res);
+}
+
+
+/*
+ * insn_decode
+ *
+ * Decode an x86 instruction from the provided instruction bytes.
+ *
+ * Return values:
+ * 0: successful decode
+ * Non-zero: an exception occurred during decode
+ */
+int
+insn_decode(struct vm_exit *exit, struct x86_insn *insn)
+{
+ enum decode_result res;
+ struct vcpu_reg_state *vrs = &exit->vrs;
+ struct x86_decode_state state;
+ uint8_t *bytes, len;
+ int mode;
+
+ if (exit == NULL || insn == NULL) {
+ log_warnx("%s: invalid input", __func__);
+ return (DECODE_ERROR);
+ }
+
+ bytes = exit->vee.vee_insn_bytes;
+ len = exit->vee.vee_insn_len;
+
+ /* 0. Initialize state and instruction objects. */
+ memset(insn, 0, sizeof(*insn));
+ memset(&state, 0, sizeof(state));
+ state.s_len = len;
+ memcpy(&state.s_bytes, bytes, len);
+
+ /* 1. Detect CPU mode. */
+ mode = detect_cpu_mode(vrs);
+ if (mode == VMM_CPU_MODE_UNKNOWN) {
+ log_warnx("%s: failed to identify cpu mode", __func__);
+#ifdef MMIO_DEBUG
+ dump_regs(vrs);
+#endif
+ return (-1);
+ }
+ insn->insn_cpu_mode = mode;
+
+#ifdef MMIO_DEBUG
+ log_info("%s: cpu mode %s detected", __progname, str_cpu_mode(mode));
+ printf("%s: got bytes: [ ", __progname);
+ for (int i = 0; i < len; i++) {
+ printf("%02x ", bytes[i]);
+ }
+ printf("]\n");
+#endif
+ /* 2. Decode prefixes. */
+ res = decode_prefix(&state, insn);
+ if (res == DECODE_ERROR) {
+ log_warnx("%s: error decoding prefixes", __func__);
+ goto err;
+ } else if (res == DECODE_DONE)
+ goto done;
+
+#ifdef MMIO_DEBUG
+ log_info("%s: prefixes {g1: 0x%02x, g2: 0x%02x, g3: 0x%02x, g4: 0x%02x,"
+ " rex: 0x%02x }", __progname, insn->insn_prefix.pfx_group1,
+ insn->insn_prefix.pfx_group2, insn->insn_prefix.pfx_group3,
+ insn->insn_prefix.pfx_group4, insn->insn_prefix.pfx_rex);
+#endif
+
+ /* 3. Pick apart opcode. Here we can start short-circuiting. */
+ res = decode_opcode(&state, insn);
+ if (res == DECODE_ERROR) {
+ log_warnx("%s: error decoding opcode", __func__);
+ goto err;
+ } else if (res == DECODE_DONE)
+ goto done;
+
+#ifdef MMIO_DEBUG
+ log_info("%s: found opcode %s (operand encoding %s) (%s)", __progname,
+ str_opcode(&insn->insn_opcode), str_operand_enc(&insn->insn_opcode),
+ str_decode_res(res));
+#endif
+
+ /* Process optional ModR/M byte. */
+ res = decode_modrm(&state, insn);
+ if (res == DECODE_ERROR) {
+ log_warnx("%s: error decoding modrm", __func__);
+ goto err;
+ }
+ if (get_modrm_addr(insn, vrs) != 0)
+ goto err;
+ if (get_modrm_reg(insn) != 0)
+ goto err;
+ if (res == DECODE_DONE)
+ goto done;
+
+#ifdef MMIO_DEBUG
+ if (insn->insn_modrm_valid)
+ log_info("%s: found ModRM 0x%02x (%s)", __progname,
+ insn->insn_modrm, str_decode_res(res));
+#endif
+
+ /* Process optional SIB byte. */
+ res = decode_sib(&state, insn);
+ if (res == DECODE_ERROR) {
+ log_warnx("%s: error decoding sib", __func__);
+ goto err;
+ } else if (res == DECODE_DONE)
+ goto done;
+
+#ifdef MMIO_DEBUG
+ if (insn->insn_sib_valid)
+ log_info("%s: found SIB 0x%02x (%s)", __progname,
+ insn->insn_sib, str_decode_res(res));
+#endif
+
+ /* Process any Displacement bytes. */
+ res = decode_disp(&state, insn);
+ if (res == DECODE_ERROR) {
+ log_warnx("%s: error decoding displacement", __func__);
+ goto err;
+ } else if (res == DECODE_DONE)
+ goto done;
+
+ /* Process any Immediate data bytes. */
+ res = decode_imm(&state, insn);
+ if (res == DECODE_ERROR) {
+ log_warnx("%s: error decoding immediate bytes", __func__);
+ goto err;
+ }
+
+done:
+ insn->insn_bytes_len = state.s_idx;
+
+#ifdef MMIO_DEBUG
+ log_info("%s: final instruction length is %u", __func__,
+ insn->insn_bytes_len);
+ dump_insn(insn);
+ log_info("%s: modrm: {mod: %d, regop: %d, rm: %d}", __func__,
+ MODRM_MOD(insn->insn_modrm), MODRM_REGOP(insn->insn_modrm),
+ MODRM_RM(insn->insn_modrm));
+ dump_regs(vrs);
+#endif /* MMIO_DEBUG */
+ return (0);
+
+err:
+#ifdef MMIO_DEBUG
+ dump_insn(insn);
+ log_info("%s: modrm: {mod: %d, regop: %d, rm: %d}", __func__,
+ MODRM_MOD(insn->insn_modrm), MODRM_REGOP(insn->insn_modrm),
+ MODRM_RM(insn->insn_modrm));
+ dump_regs(vrs);
+#endif /* MMIO_DEBUG */
+ return (-1);
+}
+
+static int
+emulate_mov(struct x86_insn *insn, struct vm_exit *exit)
+{
+ /* XXX Only supports read to register for now */
+ if (insn->insn_opcode.op_encoding != OP_ENC_RM)
+ return (-1);
+
+ /* XXX No device emulation yet. Fill with 0xFFs. */
+ exit->vrs.vrs_gprs[insn->insn_reg] = 0xFFFFFFFFFFFFFFFF;
+
+ return (0);
+}
+
+static int
+emulate_movzx(struct x86_insn *insn, struct vm_exit *exit)
+{
+ uint8_t byte, len, src = 1, dst = 2;
+ uint64_t value = 0;
+
+ /* Only RM is valid for MOVZX. */
+ if (insn->insn_opcode.op_encoding != OP_ENC_RM) {
+ log_warnx("invalid op encoding for MOVZX: %d",
+ insn->insn_opcode.op_encoding);
+ return (-1);
+ }
+
+ len = insn->insn_opcode.op_bytes_len;
+ if (len < 1 || len > sizeof(insn->insn_opcode.op_bytes)) {
+ log_warnx("invalid opcode byte length: %d", len);
+ return (-1);
+ }
+
+ byte = insn->insn_opcode.op_bytes[len - 1];
+ switch (byte) {
+ case 0xB6:
+ src = 1;
+ if (insn->insn_cpu_mode == VMM_CPU_MODE_PROT
+ || insn->insn_cpu_mode == VMM_CPU_MODE_REAL)
+ dst = 2;
+ else if (insn->insn_prefix.pfx_rex == REX_NONE)
+ dst = 4;
+ else // XXX validate CPU mode
+ dst = 8;
+ break;
+ case 0xB7:
+ src = 2;
+ if (insn->insn_prefix.pfx_rex == REX_NONE)
+ dst = 4;
+ else // XXX validate CPU mode
+ dst = 8;
+ break;
+ default:
+ log_warnx("invalid byte in MOVZX opcode: %x", byte);
+ return (-1);
+ }
+
+ if (dst == 4)
+ exit->vrs.vrs_gprs[insn->insn_reg] &= 0xFFFFFFFF00000000;
+ else
+ exit->vrs.vrs_gprs[insn->insn_reg] = 0x0UL;
+
+ /* XXX No device emulation yet. Fill with 0xFFs. */
+ switch (src) {
+ case 1: value = 0xFF; break;
+ case 2: value = 0xFFFF; break;
+ case 4: value = 0xFFFFFFFF; break;
+ case 8: value = 0xFFFFFFFFFFFFFFFF; break;
+ default:
+ log_warnx("invalid source size: %d", src);
+ return (-1);
+ }
+
+ exit->vrs.vrs_gprs[insn->insn_reg] |= value;
+
+ return (0);
+}
+
+/*
+ * insn_emulate
+ *
+ * Returns:
+ * 0: success
+ * EINVAL: exception occurred
+ * EFAULT: page fault occurred, requires retry
+ * ENOTSUP: an unsupported instruction was provided
+ */
+int
+insn_emulate(struct vm_exit *exit, struct x86_insn *insn)
+{
+ int res;
+
+ switch (insn->insn_opcode.op_type) {
+ case OP_MOV:
+ res = emulate_mov(insn, exit);
+ break;
+
+ case OP_MOVZX:
+ res = emulate_movzx(insn, exit);
+ break;
+
+ default:
+ log_warnx("%s: emulation not defined for %s", __func__,
+ str_opcode(&insn->insn_opcode));
+ res = ENOTSUP;
+ }
+
+ if (res == 0)
+ exit->vrs.vrs_gprs[VCPU_REGS_RIP] += insn->insn_bytes_len;
+
+ return (res);
+}
diff --git a/usr.sbin/vmd/mmio.h b/usr.sbin/vmd/mmio.h
new file mode 100644
index 00000000000..8acf97a8487
--- /dev/null
+++ b/usr.sbin/vmd/mmio.h
@@ -0,0 +1,138 @@
+/* $OpenBSD: mmio.h,v 1.1 2022/11/10 11:46:39 dv Exp $ */
+
+/*
+ * Copyright (c) 2022 Dave Voutila <dv@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _MMIO_H_
+#define _MMIO_H_
+
+#include <sys/types.h>
+#include <machine/vmmvar.h>
+
+/* Code segment bits */
+#define CS_L (1 << 13)
+#define CS_D (1 << 14)
+
+#define EFLAGS_VM (1 << 17) /* Virtual 8086 Mode enabled */
+
+/* Instruction Prefixes (SDM Vol 2, 2.1.1) */
+#define LEG_1_LOCK 0xF0
+#define LEG_1_REPNE 0xF2
+#define LEG_1_REP 0xF3
+#define LEG_2_CS 0x2E
+#define LEG_2_SS 0x36
+#define LEG_2_DS 0x3E
+#define LEG_2_ES 0x26
+#define LEG_2_FS 0x64
+#define LEG_2_GS 0x65
+#define LEG_3_OPSZ 0x66 /* Operand size override */
+#define LEG_4_ADDRSZ 0x67 /* Address size override */
+
+/* REX prefix bit fields */
+#define REX_B 0x01
+#define REX_X 0x02
+#define REX_R 0x04
+#define REX_W 0x08
+#define REX_BASE 0x40
+
+#define REX_NONE 0x00
+
+/* VEX prefixes (unsupported) */
+#define VEX_2_BYTE 0xC5
+#define VEX_3_BYTE 0xC4
+
+#define ESCAPE 0x0F
+
+struct x86_prefix {
+ uint8_t pfx_group1; /* LOCK, REP, or REPNE */
+ uint8_t pfx_group2; /* Segment overrides */
+ uint8_t pfx_group3; /* Operand size override */
+ uint8_t pfx_group4; /* Address size override */
+ uint8_t pfx_rex; /* REX prefix for long mode */
+};
+
+enum x86_opcode_type {
+ OP_UNKNOWN = 0, /* Default value when undecoded. */
+ OP_IN,
+ OP_INS,
+ OP_MOV,
+ OP_MOVZX,
+ OP_OUT,
+ OP_OUTS,
+ OP_TWO_BYTE, /* Opcode is two bytes, not one. */
+ OP_UNSUPPORTED, /* Valid decode, but no current support. */
+};
+
+/* Instruction Operand Encoding as described in the SDM Vol 2, Ch 3-5. */
+enum x86_operand_enc {
+ OP_ENC_UNKNOWN = 0,
+ OP_ENC_I, /* Only immediate operand */
+ OP_ENC_MI, /* Immediate to ModRM */
+ OP_ENC_MR, /* Register to ModRM */
+ OP_ENC_RM, /* ModRm to Register */
+ OP_ENC_FD, /* Value @ segment offset to RAX */
+ OP_ENC_TD, /* RAX to segment offset */
+ OP_ENC_OI, /* Immediate to Register (no emul. needed!) */
+ OP_ENC_ZO, /* No ModRM byte. */
+};
+
+/* Displacement bytes */
+enum x86_disp_type {
+ DISP_NONE = 0,
+ DISP_0,
+ DISP_1,
+ DISP_2, /* Requires Legacy prefix LEG_4_ADDRSZ */
+ DISP_4,
+};
+
+struct x86_opcode {
+ uint8_t op_bytes[2]; /* VEX unsupported */
+ uint8_t op_bytes_len; /* Length of opcode */
+ enum x86_opcode_type op_type; /* Type of opcode */
+ enum x86_operand_enc op_encoding; /* Operand encoding */
+};
+
+struct x86_insn {
+ uint8_t insn_bytes[15]; /* Original payload */
+ uint8_t insn_bytes_len; /* Size of payload */
+ int insn_cpu_mode; /* CPU mode */
+
+ struct x86_prefix insn_prefix; /* Combined prefixes */
+ struct x86_opcode insn_opcode;
+
+ uint8_t insn_modrm; /* ModR/M */
+#define MODRM_MOD(x) ((x >> 6) & 0x3)
+#define MODRM_REGOP(x) ((x >> 3) & 0x7)
+#define MODRM_RM(x) ((x >> 0) & 0x7)
+ uint8_t insn_modrm_valid; /* Is ModR/M set? */
+
+ vaddr_t insn_gva; /* Guest Virtual Addr */
+ int insn_reg; /* Register */
+
+ uint8_t insn_sib; /* Scale-Index-Base */
+ uint8_t insn_sib_valid; /* SIB byte set? */
+
+ uint64_t insn_disp; /* Displacement */
+ enum x86_disp_type insn_disp_type;
+
+ uint64_t insn_immediate; /* Immediate data */
+ uint8_t insn_immediate_len;
+};
+
+int insn_decode(struct vm_exit *, struct x86_insn *);
+int insn_emulate(struct vm_exit *, struct x86_insn *);
+
+#endif /* _MMIO_H_ */
diff --git a/usr.sbin/vmd/vm.c b/usr.sbin/vmd/vm.c
index 8efbdaadb32..763e7be8331 100644
--- a/usr.sbin/vmd/vm.c
+++ b/usr.sbin/vmd/vm.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vm.c,v 1.73 2022/09/01 22:01:40 dv Exp $ */
+/* $OpenBSD: vm.c,v 1.74 2022/11/10 11:46:39 dv Exp $ */
/*
* Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
@@ -59,6 +59,7 @@
#include "i8259.h"
#include "loadfile.h"
#include "mc146818.h"
+#include "mmio.h"
#include "ns8250.h"
#include "pci.h"
#include "virtio.h"
@@ -68,6 +69,8 @@
#define MB(x) (x * 1024UL * 1024UL)
#define GB(x) (x * 1024UL * 1024UL * 1024UL)
+#define MMIO_NOTYET 0
+
io_fn_t ioports_map[MAX_PORTS];
int run_vm(int, int[][VM_MAX_BASE_PER_DISK], int *,
@@ -1633,6 +1636,24 @@ vcpu_exit_inout(struct vm_run_params *vrp)
struct vm_exit *vei = vrp->vrp_exit;
uint8_t intr = 0xFF;
+ if (vei->vei.vei_rep || vei->vei.vei_string) {
+#ifdef MMIO_DEBUG
+ log_info("%s: %s%s%s %d-byte, enc=%d, data=0x%08x, port=0x%04x",
+ __func__,
+ vei->vei.vei_rep == 0 ? "" : "REP ",
+ vei->vei.vei_dir == VEI_DIR_IN ? "IN" : "OUT",
+ vei->vei.vei_string == 0 ? "" : "S",
+ vei->vei.vei_size, vei->vei.vei_encoding,
+ vei->vei.vei_data, vei->vei.vei_port);
+ log_info("%s: ECX = 0x%llx, RDX = 0x%llx, RSI = 0x%llx",
+ __func__,
+ vei->vrs.vrs_gprs[VCPU_REGS_RCX],
+ vei->vrs.vrs_gprs[VCPU_REGS_RDX],
+ vei->vrs.vrs_gprs[VCPU_REGS_RSI]);
+#endif /* MMIO_DEBUG */
+ fatalx("%s: can't emulate rep refix'd IN(s)/OUT(s)", __func__);
+ }
+
if (ioports_map[vei->vei.vei_port] != NULL)
intr = ioports_map[vei->vei.vei_port](vrp);
else if (vei->vei.vei_dir == VEI_DIR_IN)
@@ -1657,27 +1678,72 @@ vcpu_exit_inout(struct vm_run_params *vrp)
int
vcpu_exit_eptviolation(struct vm_run_params *vrp)
{
- int ret = 0;
- uint8_t fault_type;
struct vm_exit *ve = vrp->vrp_exit;
-
- fault_type = ve->vee.vee_fault_type;
- switch (fault_type) {
+ int ret = 0;
+#if MMIO_NOTYET
+ struct x86_insn insn;
+ uint64_t va, pa;
+ size_t len = 15; /* Max instruction length in x86. */
+#endif /* MMIO_NOTYET */
+ switch (ve->vee.vee_fault_type) {
case VEE_FAULT_HANDLED:
log_debug("%s: fault already handled", __func__);
break;
+
+#if MMIO_NOTYET
case VEE_FAULT_MMIO_ASSIST:
- log_warnx("%s: mmio assist required: rip=0x%llx", __progname,
- ve->vrs.vrs_gprs[VCPU_REGS_RIP]);
- ret = EFAULT;
+ /* Intel VMX might give us the length of the instruction. */
+ if (ve->vee.vee_insn_info & VEE_LEN_VALID)
+ len = ve->vee.vee_insn_len;
+
+ if (len > 15)
+ fatalx("%s: invalid instruction length %lu", __func__,
+ len);
+
+ /* If we weren't given instruction bytes, we need to fetch. */
+ if (!(ve->vee.vee_insn_info & VEE_BYTES_VALID)) {
+ memset(ve->vee.vee_insn_bytes, 0,
+ sizeof(ve->vee.vee_insn_bytes));
+ va = ve->vrs.vrs_gprs[VCPU_REGS_RIP];
+
+ /* XXX Only support instructions that fit on 1 page. */
+ if ((va & PAGE_MASK) + len > PAGE_SIZE) {
+ log_warnx("%s: instruction might cross page "
+ "boundary", __func__);
+ ret = EINVAL;
+ break;
+ }
+
+ ret = translate_gva(ve, va, &pa, PROT_EXEC);
+ if (ret != 0) {
+ log_warnx("%s: failed gva translation",
+ __func__);
+ break;
+ }
+
+ ret = read_mem(pa, ve->vee.vee_insn_bytes, len);
+ if (ret != 0) {
+ log_warnx("%s: failed to fetch instruction "
+ "bytes from 0x%llx", __func__, pa);
+ break;
+ }
+ }
+
+ ret = insn_decode(ve, &insn);
+ if (ret == 0)
+ ret = insn_emulate(ve, &insn);
break;
+#endif /* MMIO_NOTYET */
+
case VEE_FAULT_PROTECT:
log_debug("%s: EPT Violation: rip=0x%llx", __progname,
ve->vrs.vrs_gprs[VCPU_REGS_RIP]);
ret = EFAULT;
break;
+
default:
- fatalx("%s: invalid fault_type %d", __progname, fault_type);
+ fatalx("%s: invalid fault_type %d", __progname,
+ ve->vee.vee_fault_type);
/* UNREACHED */
}
@@ -2113,10 +2179,11 @@ get_input_data(struct vm_exit *vei, uint32_t *data)
* Translates a guest virtual address to a guest physical address by walking
* the currently active page table (if needed).
*
- * Note - this function can possibly alter the supplied VCPU state.
- * Specifically, it may inject exceptions depending on the current VCPU
- * configuration, and may alter %cr2 on #PF. Consequently, this function
- * should only be used as part of instruction emulation.
+ * XXX ensure translate_gva updates the A bit in the PTE
+ * XXX ensure translate_gva respects segment base and limits in i386 mode
+ * XXX ensure translate_gva respects segment wraparound in i8086 mode
+ * XXX ensure translate_gva updates the A bit in the segment selector
+ * XXX ensure translate_gva respects CR4.LMSLE if available
*
* Parameters:
* exit: The VCPU this translation should be performed for (guest MMU settings
@@ -2221,7 +2288,7 @@ translate_gva(struct vm_exit* exit, uint64_t va, uint64_t* pa, int mode)
return (EIO);
}
- /* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */
+ /* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */
if (pte & PG_PS)
break;