untrusted comment: signature from openbsd 6.1 base secret key
RWQEQa33SgQSEk3iUQjZBwKN4yXDFvjYuePYUJ88IGAlHIYDhp9eYqQik/wsL4UjnTymi1j3Orp4BPFWWEU3y7O/IjBA1IGUZgg=

OpenBSD 6.1 errata 002, May 2, 2017:

vmm(4) mismanaged floating point contexts.

Apply by doing:
   signify -Vep /etc/signify/openbsd-61-base.pub -x 002_vmmfpu.patch.sig \
       -m - | (cd /usr/src && patch -p0)

And then rebuild and install a new kernel:
   KK=`sysctl -n kern.osversion | cut -d# -f1`
   cd /usr/src/sys/arch/`machine`/compile/$KK
   make obj
   make config
   make
   make install

Index: sys/arch/amd64/amd64/fpu.c
===================================================================
--- sys/arch/amd64/amd64/fpu.c  21 Apr 2016 22:08:27 -0000      1.33
+++ sys/arch/amd64/amd64/fpu.c  27 Apr 2017 06:16:39 -0000      1.34
@@ -74,41 +74,10 @@
 * state is saved.
 */

-#define        fninit()                __asm("fninit")
-#define fwait()                        __asm("fwait")
-#define fnclex()               __asm("fnclex")
-#define        fxsave(addr)            __asm("fxsave %0" : "=m" (*addr))
-#define        fxrstor(addr)           __asm("fxrstor %0" : : "m" (*addr))
-#define        ldmxcsr(addr)           __asm("ldmxcsr %0" : : "m" (*addr))
-#define fldcw(addr)            __asm("fldcw %0" : : "m" (*addr))
-#define        clts()                  __asm("clts")
-#define        stts()                  lcr0(rcr0() | CR0_TS)
-
/*
 * The mask of enabled XSAVE features.
 */
uint64_t       xsave_mask;
-
-static inline void
-xsave(struct savefpu *addr, uint64_t mask)
-{
-       uint32_t lo, hi;
-
-       lo = mask;
-       hi = mask >> 32;
-       __asm volatile("xsave %0" : "=m" (*addr) : "a" (lo), "d" (hi) :
-           "memory");
-}
-
-static inline void
-xrstor(struct savefpu *addr, uint64_t mask)
-{
-       uint32_t lo, hi;
-
-       lo = mask;
-       hi = mask >> 32;
-       __asm volatile("xrstor %0" : : "m" (*addr), "a" (lo), "d" (hi));
-}

void fpudna(struct cpu_info *);
static int x86fpflags_to_siginfo(u_int32_t);
Index: sys/arch/amd64/amd64/vmm.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
--- sys/arch/amd64/amd64/vmm.c  26 Apr 2017 09:53:28 -0000      1.133
+++ sys/arch/amd64/amd64/vmm.c  27 Apr 2017 06:16:39 -0000      1.134
@@ -23,6 +23,7 @@
#include <sys/device.h>
#include <sys/pool.h>
#include <sys/proc.h>
+#include <sys/user.h>
#include <sys/ioctl.h>
#include <sys/queue.h>
#include <sys/rwlock.h>
@@ -31,6 +32,7 @@

#include <uvm/uvm_extern.h>

+#include <machine/fpu.h>
#include <machine/pmap.h>
#include <machine/biosvar.h>
#include <machine/segments.h>
@@ -145,6 +147,7 @@ int vcpu_vmx_check_cap(struct vcpu *, ui
int vcpu_vmx_compute_ctrl(uint64_t, uint16_t, uint32_t, uint32_t, uint32_t *);
int vmx_get_exit_info(uint64_t *, uint64_t *);
int vmx_handle_exit(struct vcpu *);
+int vmx_handle_xsetbv(struct vcpu *);
int vmm_handle_cpuid(struct vcpu *);
int vmx_handle_rdmsr(struct vcpu *);
int vmx_handle_wrmsr(struct vcpu *);
@@ -360,7 +363,7 @@ vmm_attach(struct device *parent, struct

       pool_init(&vm_pool, sizeof(struct vm), 0, IPL_NONE, PR_WAITOK,
           "vmpool", NULL);
-       pool_init(&vcpu_pool, sizeof(struct vcpu), 0, IPL_NONE, PR_WAITOK,
+       pool_init(&vcpu_pool, sizeof(struct vcpu), 64, IPL_NONE, PR_WAITOK,
           "vcpupl", NULL);

       vmm_softc = sc;
@@ -2373,6 +2376,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
       /* XXX CR0 shadow */
       /* XXX CR4 shadow */

+       /* xcr0 power on default sets bit 0 (x87 state) */
+       vcpu->vc_gueststate.vg_xcr0 = XCR0_X87;
+
       /* Flush the VMCS */
       if (vmclear(&vcpu->vc_control_pa)) {
               ret = EINVAL;
@@ -2498,7 +2504,7 @@ vcpu_init_vmx(struct vcpu *vcpu)
       }

       /* Host CR0 */
-       cr0 = rcr0();
+       cr0 = rcr0() & ~CR0_TS;
       if (vmwrite(VMCS_HOST_IA32_CR0, cr0)) {
               ret = EINVAL;
               goto exit;
@@ -3354,6 +3360,42 @@ vcpu_must_stop(struct vcpu *vcpu)
}

/*
+ * vmm_fpusave
+ *
+ * Modified version of fpusave_cpu from fpu.c that only saves the FPU context
+ * and does not call splipi/splx. Must be called with interrupts disabled.
+ */
+void
+vmm_fpusave(void)
+{
+       struct proc *p;
+       struct cpu_info *ci = curcpu();
+
+       p = ci->ci_fpcurproc;
+       if (p == NULL)
+               return;
+
+       if (ci->ci_fpsaving != 0)
+               panic("%s: recursive save!", __func__);
+       /*
+        * Set ci->ci_fpsaving, so that any pending exception will be
+        * thrown away.  (It will be caught again if/when the FPU
+        * state is restored.)
+        */
+       ci->ci_fpsaving = 1;
+       if (xsave_mask)
+               xsave(&p->p_addr->u_pcb.pcb_savefpu, xsave_mask);
+       else
+               fxsave(&p->p_addr->u_pcb.pcb_savefpu);
+       ci->ci_fpsaving = 0;
+
+       p->p_addr->u_pcb.pcb_cr0 |= CR0_TS;
+
+       p->p_addr->u_pcb.pcb_fpcpu = NULL;
+       ci->ci_fpcurproc = NULL;
+}
+
+/*
 * vcpu_run_vmx
 *
 * VMX main loop used to run a VCPU.
@@ -3404,6 +3446,8 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
                       break;
               case VMX_EXIT_CPUID:
                       break;
+               case VMX_EXIT_XSETBV:
+                       break;
#ifdef VMM_DEBUG
               case VMX_EXIT_TRIPLE_FAULT:
                       DPRINTF("%s: vm %d vcpu %d triple fault\n",
@@ -3528,10 +3572,76 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v

               /* Start / resume the VCPU */
               KERNEL_ASSERT_LOCKED();
+
+               /* Disable interrupts and save the current FPU state. */
+               disable_intr();
+               clts();
+               vmm_fpusave();
+
+               /* Initialize the guest FPU if not inited already */
+               if (!vcpu->vc_fpuinited) {
+                       fninit();
+                       bzero(&vcpu->vc_g_fpu.fp_fxsave,
+                           sizeof(vcpu->vc_g_fpu.fp_fxsave));
+                       vcpu->vc_g_fpu.fp_fxsave.fx_fcw =
+                           __INITIAL_NPXCW__;
+                       vcpu->vc_g_fpu.fp_fxsave.fx_mxcsr =
+                           __INITIAL_MXCSR__;
+                       fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
+
+                       vcpu->vc_fpuinited = 1;
+               }
+
+               if (xsave_mask) {
+                       /* Restore guest XCR0 and FPU context */
+                       if (vcpu->vc_gueststate.vg_xcr0 & ~xsave_mask) {
+                               DPRINTF("%s: guest attempted to set invalid "
+                                   "bits in xcr0\n", __func__);
+                               ret = EINVAL;
+                               stts();
+                               enable_intr();
+                               break;
+                       }
+
+                       /* Restore guest %xcr0 */
+                       xrstor(&vcpu->vc_g_fpu, xsave_mask);
+                       xsetbv(0, vcpu->vc_gueststate.vg_xcr0);
+               } else
+                       fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
+
               KERNEL_UNLOCK();
               ret = vmx_enter_guest(&vcpu->vc_control_pa,
                   &vcpu->vc_gueststate, resume);

+               /*
+                * On exit, interrupts are disabled, and we are running with
+                * the guest FPU state still possibly on the CPU. Save the FPU
+                * state before re-enabling interrupts.
+                */
+               if (xsave_mask) {
+                       /* Save guest %xcr0 */
+                       vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
+
+                       /* Restore host %xcr0 */
+                       xsetbv(0, xsave_mask);
+
+                       /*
+                        * Save full copy of FPU state - guest content is
+                        * always a subset of host's save area (see xsetbv
+                        * exit handler)
+                        */
+                       xsave(&vcpu->vc_g_fpu, xsave_mask);
+               } else
+                       fxsave(&vcpu->vc_g_fpu);
+
+               /*
+                * FPU state is invalid, set CR0_TS to force DNA trap on next
+                * access.
+                */
+               stts();
+
+               enable_intr();
+
               exit_reason = VM_EXIT_NONE;
               if (ret == 0) {
                       /*
@@ -3545,6 +3655,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
                               printf("%s: can't read guest rflags during "
                                   "exit\n", __func__);
                               ret = EINVAL;
+                               KERNEL_LOCK();
                               break;
                        }
               }
@@ -3826,6 +3937,10 @@ vmx_handle_exit(struct vcpu *vcpu)
               ret = vmx_handle_wrmsr(vcpu);
               update_rip = 1;
               break;
+       case VMX_EXIT_XSETBV:
+               ret = vmx_handle_xsetbv(vcpu);
+               update_rip = 1;
+               break;
       case VMX_EXIT_TRIPLE_FAULT:
#ifdef VMM_DEBUG
               DPRINTF("%s: vm %d vcpu %d triple fault\n", __func__,
@@ -4351,6 +4466,62 @@ vmx_handle_rdmsr(struct vcpu *vcpu)
}

/*
+ * vmx_handle_xsetbv
+ *
+ * Handler for xsetbv instructions. We allow the guest VM to set xcr0 values
+ * limited to the xsave_mask in use in the host.
+ *
+ * Parameters:
+ *  vcpu: vcpu structure containing instruction info causing the exit
+ *
+ * Return value:
+ *  0: The operation was successful
+ *  EINVAL: An error occurred
+ */
+int
+vmx_handle_xsetbv(struct vcpu *vcpu)
+{
+       uint64_t insn_length;
+       uint64_t *rax, *rdx, *rcx;;
+
+       if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
+               printf("%s: can't obtain instruction length\n", __func__);
+               return (EINVAL);
+       }
+
+       /* All XSETBV instructions are 0x0F 0x01 0xD1 */
+       KASSERT(insn_length == 3);
+
+       rax = &vcpu->vc_gueststate.vg_rax;
+       rcx = &vcpu->vc_gueststate.vg_rcx;
+       rdx = &vcpu->vc_gueststate.vg_rdx;
+
+       if (*rcx != 0) {
+               DPRINTF("%s: guest specified invalid xcr register number "
+                   "%lld\n", __func__, *rcx);
+               /* XXX this should #GP(0) instead of killing the guest */
+               return (EINVAL);
+       }
+
+       /*
+        * No bits in %edx are currently supported. Check this, and validate
+        * against the host mask.
+        */
+       if (*rdx != 0 || (*rax & ~xsave_mask)) {
+               DPRINTF("%s: guest specified invalid xcr0 content "
+                   "(0x%llx:0x%llx)\n", __func__, *rdx, *rax);
+               /* XXX this should #GP(0) instead of killing the guest */
+               return (EINVAL);
+       }
+
+       vcpu->vc_gueststate.vg_xcr0 = *rax;
+
+       vcpu->vc_gueststate.vg_rip += insn_length;
+
+       return (0);
+}
+
+/*
 * vmx_handle_wrmsr
 *
 * Handler for wrmsr instructions. This handler logs the access, and discards
@@ -4413,6 +4584,7 @@ vmm_handle_cpuid(struct vcpu *vcpu)
{
       uint64_t insn_length;
       uint64_t *rax, *rbx, *rcx, *rdx;
+       uint32_t eax, ebx, ecx, edx;

       if (vmm_softc->mode == VMM_MODE_VMX ||
           vmm_softc->mode == VMM_MODE_EPT) {
@@ -4431,9 +4603,11 @@ vmm_handle_cpuid(struct vcpu *vcpu)
       rcx = &vcpu->vc_gueststate.vg_rcx;
       rdx = &vcpu->vc_gueststate.vg_rdx;

+       CPUID_LEAF(rax, 0, eax, ebx, ecx, edx);
+
       switch (*rax) {
       case 0x00:      /* Max level and vendor ID */
-               *rax = 0x07; /* cpuid_level */
+               *rax = 0x0d; /* cpuid_level */
               *rbx = *((uint32_t *)&cpu_vendor);
               *rdx = *((uint32_t *)&cpu_vendor + 1);
               *rcx = *((uint32_t *)&cpu_vendor + 2);
@@ -4580,13 +4754,19 @@ vmm_handle_cpuid(struct vcpu *vcpu)
               *rcx = 0;
               *rdx = 0;
               break;
-       case 0x0d:      /* Processor ext. state information (not supported) */
-               DPRINTF("%s: function 0x0d (ext. state info) not supported\n",
-                   __func__);
-               *rax = 0;
-               *rbx = 0;
-               *rcx = 0;
-               *rdx = 0;
+       case 0x0d:      /* Processor ext. state information */
+               if (*rcx == 0) {
+                       *rax = xsave_mask;
+                       *rbx = ebx;
+                       *rcx = ecx;
+                       *rdx = edx;
+               } else {
+                       CPUID_LEAF(rax, *rcx, eax, ebx, ecx, edx);
+                       *rax = eax;
+                       *rbx = ebx;
+                       *rcx = ecx;
+                       *rdx = edx;
+               }
               break;
       case 0x0f:      /* QoS info (not supported) */
               DPRINTF("%s: function 0x0f (QoS info) not supported\n",
Index: sys/arch/amd64/amd64/vmm_support.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm_support.S,v
--- sys/arch/amd64/amd64/vmm_support.S  25 Mar 2017 15:25:20 -0000      1.7
+++ sys/arch/amd64/amd64/vmm_support.S  27 Apr 2017 06:16:39 -0000      1.8
@@ -17,6 +17,7 @@

#include "assym.h"
#include <machine/asm.h>
+#include <machine/psl.h>
#include <machine/specialreg.h>

/*
@@ -154,6 +155,9 @@ skip_init:
        */

       pushfq
+       popq    %rax
+       andq    $(~PSL_I), %rax
+       pushq   %rax

       /*
        * Save (possibly) lazy-switched selectors
@@ -354,7 +358,6 @@ restore_host:
        * first. This is to accommodate possibly lazy-switched
        * selectors from above
        */
-       cli
       popq    %rdx
       popq    %rax
       movq    $MSR_KERNELGSBASE, %rcx
@@ -371,7 +374,6 @@ restore_host:
       popq    %rax
       movq    $MSR_FSBASE, %rcx
       wrmsr
-       sti

       popw    %ax
       movw    %ax, %ss
Index: sys/arch/amd64/include/cpufunc.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpufunc.h,v
--- sys/arch/amd64/include/cpufunc.h    4 Sep 2016 09:22:28 -0000       1.13
+++ sys/arch/amd64/include/cpufunc.h    27 Apr 2017 06:16:39 -0000      1.14
@@ -333,6 +333,16 @@ xsetbv(uint32_t reg, uint64_t mask)
       lo = mask;
       hi = mask >> 32;
       __asm volatile("xsetbv" :: "c" (reg), "a" (lo), "d" (hi) : "memory");
+}
+
+static __inline uint64_t
+xgetbv(uint32_t reg)
+{
+       uint32_t lo, hi;
+
+       __asm volatile("xgetbv" : "=a" (lo), "=d" (hi) : "c" (reg));
+
+       return (((uint64_t)hi << 32) | (uint64_t)lo);
}

/* Break into DDB/KGDB. */
Index: sys/arch/amd64/include/fpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/fpu.h,v
--- sys/arch/amd64/include/fpu.h        25 Mar 2015 21:05:18 -0000      1.11
+++ sys/arch/amd64/include/fpu.h        27 Apr 2017 06:16:39 -0000      1.12
@@ -70,6 +70,37 @@ void fpusave_proc(struct proc *, int);
void fpusave_cpu(struct cpu_info *, int);
void fpu_kernel_enter(void);
void fpu_kernel_exit(void);
+
+#define fninit()               __asm("fninit")
+#define fwait()                        __asm("fwait")
+#define fnclex()               __asm("fnclex")
+#define fxsave(addr)           __asm("fxsave %0" : "=m" (*addr))
+#define fxrstor(addr)          __asm("fxrstor %0" : : "m" (*addr))
+#define ldmxcsr(addr)          __asm("ldmxcsr %0" : : "m" (*addr))
+#define fldcw(addr)            __asm("fldcw %0" : : "m" (*addr))
+#define clts()                 __asm("clts")
+#define stts()                 lcr0(rcr0() | CR0_TS)
+
+static inline void
+xsave(struct savefpu *addr, uint64_t mask)
+{
+       uint32_t lo, hi;
+
+       lo = mask;
+       hi = mask >> 32;
+       __asm volatile("xsave %0" : "=m" (*addr) : "a" (lo), "d" (hi) :
+           "memory");
+}
+
+static inline void
+xrstor(struct savefpu *addr, uint64_t mask)
+{
+       uint32_t lo, hi;
+
+       lo = mask;
+       hi = mask >> 32;
+       __asm volatile("xrstor %0" : : "m" (*addr), "a" (lo), "d" (hi));
+}

#endif

Index: sys/arch/amd64/include/vmmvar.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
--- sys/arch/amd64/include/vmmvar.h     23 Mar 2017 08:05:58 -0000      1.32
+++ sys/arch/amd64/include/vmmvar.h     27 Apr 2017 06:16:39 -0000      1.33
@@ -638,6 +638,7 @@ struct vmx_gueststate
       uint64_t        vg_rip;                 /* 0x80 */
       uint32_t        vg_exit_reason;         /* 0x88 */
       uint64_t        vg_rflags;              /* 0x90 */
+       uint64_t        vg_xcr0;                /* 0x98 */
};

/*
@@ -649,6 +650,12 @@ struct vm;
 * Virtual CPU
 */
struct vcpu {
+       /*
+        * Guest FPU state - this must remain as the first member of the struct
+        * to ensure 64-byte alignment (set up during vcpu_pool init)
+        */
+       struct savefpu vc_g_fpu;
+
       /* VMCS / VMCB pointer */
       vaddr_t vc_control_va;
       uint64_t vc_control_pa;
@@ -673,6 +680,10 @@ struct vcpu {

       uint16_t vc_intr;
       uint8_t vc_irqready;
+
+       uint8_t vc_fpuinited;
+
+       uint64_t vc_h_xcr0;

       /* VMX only */
       uint64_t vc_vmx_basic;