untrusted comment: signature from openbsd 6.2 base secret key
RWRVWzAMgtyg7h6Z/ES+ftCrC3y4jz05b9Q4N4uIZDqQEzb7lw6vB6BGumpp3us1ydI/8HGsYSlzPUl7ai/pMISPf6LswZDJZAI=

OpenBSD 6.2 errata 017, June 24, 2018:

Intel CPUs speculatively access FPU registers even when FPU is disabled,
so data (including AES keys) from previous contexts could be discovered
if using lazy-save approach.  Switch to eager-saving approach.

Apply by doing:
   signify -Vep /etc/signify/openbsd-62-base.pub -x 017_intelfpu.patch.sig \
       -m - | (cd /usr/src && patch -p0)

And then rebuild and install the kernel:
   KK=`sysctl -n kern.osversion | cut -d# -f1`
   cd /usr/src/sys/arch/`machine`/compile/$KK
   make obj
   make config
   make
   make install

Index: sys/arch/amd64/amd64/acpi_machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_machdep.c,v
retrieving revision 1.78
diff -u -p -r1.78 acpi_machdep.c
--- sys/arch/amd64/amd64/acpi_machdep.c 27 Mar 2017 18:32:53 -0000      1.78
+++ sys/arch/amd64/amd64/acpi_machdep.c 21 Jun 2018 11:54:01 -0000
@@ -389,7 +389,7 @@ acpi_sleep_cpu(struct acpi_softc *sc, in
        */
       if (acpi_savecpu()) {
               /* Suspend path */
-               fpusave_cpu(curcpu(), 1);
+               KASSERT((curcpu()->ci_flags & CPUF_USERXSTATE) == 0);
               wbinvd();

#ifdef HIBERNATE
@@ -416,6 +416,7 @@ acpi_sleep_cpu(struct acpi_softc *sc, in
               return (ECANCELED);
       }
       /* Resume path */
+       fpureset();

       /* Reset the vectors */
       sc->sc_facs->wakeup_vector = 0;
Index: sys/arch/amd64/amd64/acpi_wakecode.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_wakecode.S,v
retrieving revision 1.41
diff -u -p -r1.41 acpi_wakecode.S
--- sys/arch/amd64/amd64/acpi_wakecode.S        30 Aug 2017 23:40:22 -0000      1.41
+++ sys/arch/amd64/amd64/acpi_wakecode.S        21 Jun 2018 11:54:01 -0000
@@ -217,7 +217,7 @@ _C_LABEL(acpi_protected_mode_resume):

       /* Reenable paging by setting the appropriate bits in CR0 */
       movl    %cr0,%eax
-       orl     $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP),%eax
+       orl     $CR0_DEFAULT,%eax
       movl    %eax,%cr0

       /* Flush the prefetch queue again */
Index: sys/arch/amd64/amd64/aesni.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/aesni.c,v
retrieving revision 1.42
diff -u -p -r1.42 aesni.c
--- sys/arch/amd64/amd64/aesni.c        8 Sep 2017 05:36:51 -0000       1.42
+++ sys/arch/amd64/amd64/aesni.c        21 Jun 2018 11:54:01 -0000
@@ -256,7 +256,9 @@ aesni_newsession(u_int32_t *sidp, struct
                       bzero(ses->ses_ghash->Z, GMAC_BLOCK_LEN);

                       /* prepare a hash subkey */
+                       fpu_kernel_enter();
                       aesni_enc(ses, ses->ses_ghash->H, ses->ses_ghash->H);
+                       fpu_kernel_exit();
                       break;

               case CRYPTO_MD5_HMAC:
Index: sys/arch/amd64/amd64/autoconf.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/autoconf.c,v
retrieving revision 1.49
diff -u -p -r1.49 autoconf.c
--- sys/arch/amd64/amd64/autoconf.c     20 Jun 2017 21:05:46 -0000      1.49
+++ sys/arch/amd64/amd64/autoconf.c     21 Jun 2018 11:54:01 -0000
@@ -138,10 +138,6 @@ cpu_configure(void)

       unmap_startup();

-#ifdef MULTIPROCESSOR
-       cpu_init_idle_pcbs();
-#endif
-
       lcr8(0);
       spl0();
       cold = 0;
Index: sys/arch/amd64/amd64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.107.2.1
diff -u -p -r1.107.2.1 cpu.c
--- sys/arch/amd64/amd64/cpu.c  26 Feb 2018 12:29:48 -0000      1.107.2.1
+++ sys/arch/amd64/amd64/cpu.c  21 Jun 2018 11:54:01 -0000
@@ -70,6 +70,7 @@
#include "pvbus.h"

#include <sys/param.h>
+#include <sys/proc.h>
#include <sys/timeout.h>
#include <sys/systm.h>
#include <sys/device.h>
@@ -77,6 +78,7 @@
#include <sys/memrange.h>
#include <dev/rndvar.h>
#include <sys/atomic.h>
+#include <sys/user.h>

#include <uvm/uvm_extern.h>

@@ -409,7 +411,6 @@ cpu_attach(struct device *parent, struct
       pcb->pcb_kstack = kstack + USPACE - 16;
       pcb->pcb_rbp = pcb->pcb_rsp = kstack + USPACE - 16;
       pcb->pcb_pmap = pmap_kernel();
-       pcb->pcb_cr0 = rcr0();
       pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdirpa;
#endif

@@ -491,6 +492,28 @@ cpu_attach(struct device *parent, struct
#endif /* NVMM > 0 */
}

+static void
+replacexsave(void)
+{
+       extern long _xrstor, _xsave, _xsaveopt;
+       u_int32_t eax, ebx, ecx, edx;
+       static int replacedone = 0;
+       int s;
+
+       if (replacedone)
+               return;
+       replacedone = 1;
+
+       /* find out whether xsaveopt is supported */
+       CPUID_LEAF(0xd, 1, eax, ebx, ecx, edx);
+       s = splhigh();
+       codepatch_replace(CPTAG_XRSTOR, &_xrstor, 4);
+       codepatch_replace(CPTAG_XSAVE,
+           (eax & XSAVE_XSAVEOPT) ? &_xsaveopt : &_xsave, 4);
+       splx(s);
+}
+
+
/*
 * Initialize the processor appropriately.
 */
@@ -498,6 +521,7 @@ cpu_attach(struct device *parent, struct
void
cpu_init(struct cpu_info *ci)
{
+       struct savefpu *sfp;
       u_int cr4;

       /* configure the CPU if needed */
@@ -509,7 +533,6 @@ cpu_init(struct cpu_info *ci)
        */
       patinit(ci);

-       lcr0(rcr0() | CR0_WP);
       cr4 = rcr4() | CR4_DEFAULT;
       if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMEP)
               cr4 |= CR4_SMEP;
@@ -519,7 +542,7 @@ cpu_init(struct cpu_info *ci)
               cr4 |= CR4_FSGSBASE;
       if (ci->ci_feature_sefflags_ecx & SEFF0ECX_UMIP)
               cr4 |= CR4_UMIP;
-       if (cpu_ecxfeature & CPUIDECX_XSAVE)
+       if ((cpu_ecxfeature & CPUIDECX_XSAVE) && cpuid_level >= 0xd)
               cr4 |= CR4_OSXSAVE;
       lcr4(cr4);

@@ -532,9 +555,25 @@ cpu_init(struct cpu_info *ci)
                       xsave_mask |= XCR0_AVX;
               xsetbv(0, xsave_mask);
               CPUID_LEAF(0xd, 0, eax, ebx, ecx, edx);
-               fpu_save_len = ebx;
+               if (CPU_IS_PRIMARY(ci)) {
+                       fpu_save_len = ebx;
+                       KASSERT(fpu_save_len <= sizeof(struct savefpu));
+               } else {
+                       KASSERT(ebx == fpu_save_len);
+               }
+
+               replacexsave();
       }

+       /* Give proc0 a clean FPU save area */
+       sfp = &proc0.p_addr->u_pcb.pcb_savefpu;
+       memset(sfp, 0, fpu_save_len);
+       if (xsave_mask) {
+               /* must not use xsaveopt here */
+               xsave(sfp, xsave_mask);
+       } else
+               fxsave(sfp);
+
#if NVMM > 0
       /* Re-enable VMM if needed */
       if (ci->ci_flags & CPUF_VMM)
@@ -602,24 +641,6 @@ cpu_boot_secondary_processors(void)
}

void
-cpu_init_idle_pcbs(void)
-{
-       struct cpu_info *ci;
-       u_long i;
-
-       for (i=0; i < MAXCPUS; i++) {
-               ci = cpu_info[i];
-               if (ci == NULL)
-                       continue;
-               if (ci->ci_idle_pcb == NULL)
-                       continue;
-               if ((ci->ci_flags & CPUF_PRESENT) == 0)
-                       continue;
-               x86_64_init_pcb_tss_ldt(ci);
-       }
-}
-
-void
cpu_start_secondary(struct cpu_info *ci)
{
       int i;
@@ -738,7 +759,6 @@ cpu_hatch(void *v)
               panic("%s: already running!?", ci->ci_dev->dv_xname);
#endif

-       lcr0(ci->ci_idle_pcb->pcb_cr0);
       cpu_init_idt();
       lapic_set_lvt();
       gdt_init_cpu(ci);
@@ -780,15 +800,14 @@ cpu_debug_dump(void)
       struct cpu_info *ci;
       CPU_INFO_ITERATOR cii;

-       db_printf("addr         dev     id      flags   ipis    curproc         fpcurproc\n");
+       db_printf("addr         dev     id      flags   ipis    curproc\n");
       CPU_INFO_FOREACH(cii, ci) {
-               db_printf("%p   %s      %u      %x      %x      %10p    %10p\n",
+               db_printf("%p   %s      %u      %x      %x      %10p\n",
                   ci,
                   ci->ci_dev == NULL ? "BOOT" : ci->ci_dev->dv_xname,
                   ci->ci_cpuid,
                   ci->ci_flags, ci->ci_ipis,
-                   ci->ci_curproc,
-                   ci->ci_fpcurproc);
+                   ci->ci_curproc);
       }
}
#endif
Index: sys/arch/amd64/amd64/db_interface.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/db_interface.c,v
retrieving revision 1.29
diff -u -p -r1.29 db_interface.c
--- sys/arch/amd64/amd64/db_interface.c 19 Jul 2017 14:34:10 -0000      1.29
+++ sys/arch/amd64/amd64/db_interface.c 21 Jun 2018 11:54:01 -0000
@@ -66,8 +66,8 @@
#endif

extern label_t *db_recover;
-extern char *trap_type[];
-extern int trap_types;
+extern const char * const trap_type[];
+extern const int trap_types;

#ifdef MULTIPROCESSOR
struct mutex ddb_mp_mutex =
Index: sys/arch/amd64/amd64/fpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/fpu.c,v
retrieving revision 1.37
diff -u -p -r1.37 fpu.c
--- sys/arch/amd64/amd64/fpu.c  4 Oct 2017 02:10:33 -0000       1.37
+++ sys/arch/amd64/amd64/fpu.c  21 Jun 2018 11:54:01 -0000
@@ -53,35 +53,13 @@
#include <machine/specialreg.h>
#include <machine/fpu.h>

-#include <dev/isa/isavar.h>
-
-int    xrstor_user(struct savefpu *_addr, uint64_t _mask);
void   trap(struct trapframe *);

/*
- * We do lazy initialization and switching using the TS bit in cr0 and the
- * MDP_USEDFPU bit in mdproc.
- *
- * DNA exceptions are handled like this:
- *
- * 1) If there is no FPU, return and go to the emulator.
- * 2) If someone else has used the FPU, save its state into that process' PCB.
- * 3a) If MDP_USEDFPU is not set, set it and initialize the FPU.
- * 3b) Otherwise, reload the process' previous FPU state.
- *
- * When a process is created or exec()s, its saved cr0 image has the TS bit
- * set and the MDP_USEDFPU bit clear.  The MDP_USEDFPU bit is set when the
- * process first gets a DNA and the FPU is initialized.  The TS bit is turned
- * off when the FPU is used, and turned on again later when the process' FPU
- * state is saved.
- */
-
-/*
 * The mask of enabled XSAVE features.
 */
uint64_t       xsave_mask;

-void fpudna(struct cpu_info *, struct trapframe *);
static int x86fpflags_to_siginfo(u_int32_t);

/*
@@ -101,7 +79,6 @@ uint32_t     fpu_mxcsr_mask;
void
fpuinit(struct cpu_info *ci)
{
-       lcr0(rcr0() & ~(CR0_EM|CR0_TS));
       fninit();
       if (fpu_mxcsr_mask == 0) {
               struct fxsave64 fx __attribute__((aligned(16)));
@@ -113,7 +90,6 @@ fpuinit(struct cpu_info *ci)
               else
                       fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
       }
-       lcr0(rcr0() | (CR0_TS));
}

/*
@@ -126,23 +102,18 @@ fpuinit(struct cpu_info *ci)
void
fputrap(struct trapframe *frame)
{
-       struct proc *p = curcpu()->ci_fpcurproc;
+       struct cpu_info *ci = curcpu();
+       struct proc *p = curproc;
       struct savefpu *sfp = &p->p_addr->u_pcb.pcb_savefpu;
       u_int32_t mxcsr, statbits;
       u_int16_t cw;
       int code;
       union sigval sv;

-#ifdef DIAGNOSTIC
-       /*
-        * At this point, fpcurproc should be curproc.  If it wasn't,
-        * the TS bit should be set, and we should have gotten a DNA exception.
-        */
-       if (p != curproc)
-               panic("fputrap: wrong proc");
-#endif
+       KASSERT(ci->ci_flags & CPUF_USERXSTATE);
+       ci->ci_flags &= ~CPUF_USERXSTATE;
+       fpusavereset(sfp);

-       fxsave(sfp);
       if (frame->tf_trapno == T_XMM) {
               mxcsr = sfp->fp_fxsave.fx_mxcsr;
               statbits = mxcsr;
@@ -187,211 +158,21 @@ x86fpflags_to_siginfo(u_int32_t flags)
        return (FPE_FLTINV);
}

-/*
- * Implement device not available (DNA) exception
- *
- * If we were the last process to use the FPU, we can simply return.
- * Otherwise, we save the previous state, if necessary, and restore our last
- * saved state.
- */
-void
-fpudna(struct cpu_info *ci, struct trapframe *frame)
-{
-       struct savefpu *sfp;
-       struct proc *p;
-       int s;
-
-       if (ci->ci_fpsaving) {
-               printf("recursive fpu trap; cr0=%x\n", rcr0());
-               return;
-       }
-
-       s = splipi();
-
-#ifdef MULTIPROCESSOR
-       p = ci->ci_curproc;
-#else
-       p = curproc;
-#endif
-
-       /*
-        * Initialize the FPU state to clear any exceptions.  If someone else
-        * was using the FPU, save their state.
-        */
-       if (ci->ci_fpcurproc != NULL && ci->ci_fpcurproc != p) {
-               fpusave_cpu(ci, ci->ci_fpcurproc != &proc0);
-               uvmexp.fpswtch++;
-       }
-       splx(s);
-
-       if (p == NULL) {
-               clts();
-               return;
-       }
-
-       KDASSERT(ci->ci_fpcurproc == NULL);
-#ifndef MULTIPROCESSOR
-       KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL);
-#else
-       if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
-               fpusave_proc(p, 1);
-#endif
-
-       p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
-       clts();
-
-       s = splipi();
-       ci->ci_fpcurproc = p;
-       p->p_addr->u_pcb.pcb_fpcpu = ci;
-       splx(s);
-
-       sfp = &p->p_addr->u_pcb.pcb_savefpu;
-
-       if ((p->p_md.md_flags & MDP_USEDFPU) == 0) {
-               fninit();
-               bzero(&sfp->fp_fxsave, sizeof(sfp->fp_fxsave));
-               sfp->fp_fxsave.fx_fcw = __INITIAL_NPXCW__;
-               sfp->fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
-               fxrstor(&sfp->fp_fxsave);
-               p->p_md.md_flags |= MDP_USEDFPU;
-       } else {
-               if (xsave_mask) {
-                       if (xrstor_user(sfp, xsave_mask)) {
-                               fpusave_proc(p, 0);     /* faulted */
-                               frame->tf_trapno = T_PROTFLT;
-                               trap(frame);
-                               return;
-                       }
-               } else {
-                       static double   zero = 0.0;
-
-                       /*
-                        * amd fpu does not restore fip, fdp, fop on fxrstor
-                        * thus leaking other process's execution history.
-                        */
-                       fnclex();
-                       __asm volatile("ffree %%st(7)\n\tfldl %0" : : "m" (zero));
-                       fxrstor(sfp);
-               }
-       }
-}
-
-
-void
-fpusave_cpu(struct cpu_info *ci, int save)
-{
-       struct proc *p;
-       int s;
-
-       KDASSERT(ci == curcpu());
-
-       p = ci->ci_fpcurproc;
-       if (p == NULL)
-               return;
-
-       if (save) {
-#ifdef DIAGNOSTIC
-               if (ci->ci_fpsaving != 0)
-                       panic("fpusave_cpu: recursive save!");
-#endif
-               /*
-                * Set ci->ci_fpsaving, so that any pending exception will be
-                * thrown away.  (It will be caught again if/when the FPU
-                * state is restored.)
-                */
-               clts();
-               ci->ci_fpsaving = 1;
-               if (xsave_mask)
-                       xsave(&p->p_addr->u_pcb.pcb_savefpu, xsave_mask);
-               else
-                       fxsave(&p->p_addr->u_pcb.pcb_savefpu);
-               ci->ci_fpsaving = 0;
-       }
-
-       stts();
-       p->p_addr->u_pcb.pcb_cr0 |= CR0_TS;
-
-       s = splipi();
-       p->p_addr->u_pcb.pcb_fpcpu = NULL;
-       ci->ci_fpcurproc = NULL;
-       splx(s);
-}
-
-/*
- * Save p's FPU state, which may be on this processor or another processor.
- */
-void
-fpusave_proc(struct proc *p, int save)
-{
-       struct cpu_info *ci = curcpu();
-       struct cpu_info *oci;
-
-       KDASSERT(p->p_addr != NULL);
-
-       oci = p->p_addr->u_pcb.pcb_fpcpu;
-       if (oci == NULL)
-               return;
-
-#if defined(MULTIPROCESSOR)
-       if (oci == ci) {
-               int s = splipi();
-               fpusave_cpu(ci, save);
-               splx(s);
-       } else {
-               oci->ci_fpsaveproc = p;
-               x86_send_ipi(oci,
-                   save ? X86_IPI_SYNCH_FPU : X86_IPI_FLUSH_FPU);
-               while (p->p_addr->u_pcb.pcb_fpcpu != NULL)
-                       CPU_BUSY_CYCLE();
-       }
-#else
-       KASSERT(ci->ci_fpcurproc == p);
-       fpusave_cpu(ci, save);
-#endif
-}
-
void
fpu_kernel_enter(void)
{
-       struct cpu_info *ci = curcpu();
-       uint32_t         cw;
-       int              s;
-
-       /*
-        * Fast path.  If the kernel was using the FPU before, there
-        * is no work to do besides clearing TS.
-        */
-       if (ci->ci_fpcurproc == &proc0) {
-               clts();
-               return;
-       }
-
-       s = splipi();
+       struct cpu_info *ci = curcpu();

-       if (ci->ci_fpcurproc != NULL) {
-               fpusave_cpu(ci, 1);
-               uvmexp.fpswtch++;
+       /* save curproc's FPU state if we haven't already */
+       if (ci->ci_flags & CPUF_USERXSTATE) {
+               ci->ci_flags &= ~CPUF_USERXSTATE;
+               fpusavereset(&curproc->p_addr->u_pcb.pcb_savefpu);
       }
-
-       /* Claim the FPU */
-       ci->ci_fpcurproc = &proc0;
-
-       splx(s);
-
-       /* Disable DNA exceptions */
-       clts();
-
-       /* Initialize the FPU */
-       fninit();
-       cw = __INITIAL_NPXCW__;
-       fldcw(&cw);
-       cw = __INITIAL_MXCSR__;
-       ldmxcsr(&cw);
}

void
fpu_kernel_exit(void)
{
-       /* Enable DNA exceptions */
-       stts();
+       /* make sure we don't leave anything in the registers */
+       fpureset();
}
Index: sys/arch/amd64/amd64/genassym.cf
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/genassym.cf,v
retrieving revision 1.31.8.1
diff -u -p -r1.31.8.1 genassym.cf
--- sys/arch/amd64/amd64/genassym.cf    26 Feb 2018 12:29:48 -0000      1.31.8.1
+++ sys/arch/amd64/amd64/genassym.cf    21 Jun 2018 11:54:01 -0000
@@ -94,9 +94,8 @@ member        pcb_rbp
member pcb_kstack
member pcb_fsbase
member pcb_onfault
-member pcb_fpcpu
member pcb_pmap
-member pcb_cr0
+member pcb_savefpu

struct pmap
member pm_cpus
@@ -131,7 +130,8 @@ member      CPU_INFO_USER_CR3       ci_user_cr3
member CPU_INFO_KERN_RSP       ci_kern_rsp
member CPU_INFO_INTR_RSP       ci_intr_rsp

-export CPUF_USERSEGS_BIT
+export CPUF_USERSEGS
+export CPUF_USERXSTATE

struct intrsource
member is_recurse
Index: sys/arch/amd64/amd64/identcpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/identcpu.c,v
retrieving revision 1.87.2.1
diff -u -p -r1.87.2.1 identcpu.c
--- sys/arch/amd64/amd64/identcpu.c     26 Feb 2018 12:29:48 -0000      1.87.2.1
+++ sys/arch/amd64/amd64/identcpu.c     21 Jun 2018 11:54:01 -0000
@@ -217,6 +217,11 @@ const struct {
       { CPUIDEDX_ITSC,        "ITSC" },
}, cpu_amdspec_ebxfeatures[] = {
       { CPUIDEBX_IBPB,        "IBPB" },
+}, cpu_xsave_extfeatures[] = {
+       { XSAVE_XSAVEOPT,       "XSAVEOPT" },
+       { XSAVE_XSAVEC,         "XSAVEC" },
+       { XSAVE_XGETBV1,        "XGETBV1" },
+       { XSAVE_XSAVES,         "XSAVES" },
};

int
@@ -651,6 +656,14 @@ identifycpu(struct cpu_info *ci)
                                       printf(",%s",
                                           cpu_amdspec_ebxfeatures[i].str);
               }
+       }
+
+       /* xsave subfeatures */
+       if (cpuid_level >= 0xd) {
+               CPUID_LEAF(0xd, 1, val, dummy, dummy, dummy);
+               for (i = 0; i < nitems(cpu_xsave_extfeatures); i++)
+                       if (val & cpu_xsave_extfeatures[i].bit)
+                               printf(",%s", cpu_xsave_extfeatures[i].str);
       }

       if (cpu_meltdown)
Index: sys/arch/amd64/amd64/ipifuncs.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/ipifuncs.c,v
retrieving revision 1.28
diff -u -p -r1.28 ipifuncs.c
--- sys/arch/amd64/amd64/ipifuncs.c     23 Nov 2015 22:57:12 -0000      1.28
+++ sys/arch/amd64/amd64/ipifuncs.c     21 Jun 2018 11:54:01 -0000
@@ -62,9 +62,6 @@
void x86_64_ipi_nop(struct cpu_info *);
void x86_64_ipi_halt(struct cpu_info *);

-void x86_64_ipi_synch_fpu(struct cpu_info *);
-void x86_64_ipi_flush_fpu(struct cpu_info *);
-
#if NVMM > 0
void x86_64_ipi_start_vmm(struct cpu_info *);
void x86_64_ipi_stop_vmm(struct cpu_info *);
@@ -85,8 +82,8 @@ void (*ipifunc[X86_NIPI])(struct cpu_inf
{
       x86_64_ipi_halt,
       x86_64_ipi_nop,
-       x86_64_ipi_flush_fpu,
-       x86_64_ipi_synch_fpu,
+       NULL,
+       NULL,
       NULL,
       x86_64_ipi_reload_mtrr,
       x86_setperf_ipi,
@@ -115,7 +112,6 @@ x86_64_ipi_halt(struct cpu_info *ci)
       SCHED_ASSERT_UNLOCKED();
       KASSERT(!__mp_lock_held(&kernel_lock));

-       fpusave_cpu(ci, 1);
       disable_intr();
       lapic_disable();
       wbinvd();
@@ -125,20 +121,6 @@ x86_64_ipi_halt(struct cpu_info *ci)
       for(;;) {
               __asm volatile("hlt");
       }
-}
-
-void
-x86_64_ipi_flush_fpu(struct cpu_info *ci)
-{
-       if (ci->ci_fpsaveproc == ci->ci_fpcurproc)
-               fpusave_cpu(ci, 0);
-}
-
-void
-x86_64_ipi_synch_fpu(struct cpu_info *ci)
-{
-       if (ci->ci_fpsaveproc == ci->ci_fpcurproc)
-               fpusave_cpu(ci, 1);
}

#ifdef MTRR
Index: sys/arch/amd64/amd64/locore.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v
retrieving revision 1.89.2.1
diff -u -p -r1.89.2.1 locore.S
--- sys/arch/amd64/amd64/locore.S       26 Feb 2018 12:29:48 -0000      1.89.2.1
+++ sys/arch/amd64/amd64/locore.S       21 Jun 2018 11:54:01 -0000
@@ -113,10 +113,11 @@
#include <sys/syscall.h>

#include <machine/param.h>
+#include <machine/codepatch.h>
#include <machine/psl.h>
#include <machine/segments.h>
#include <machine/specialreg.h>
-#include <machine/trap.h>
+#include <machine/trap.h>                      /* T_PROTFLT */
#include <machine/frameasm.h>

#if NLAPIC > 0
@@ -345,7 +346,12 @@ ENTRY(cpu_switchto)
       movb    $SONPROC,P_STAT(%r12)   # p->p_stat = SONPROC
       SET_CURPROC(%r12,%rcx)

-       movl    CPUVAR(CPUID),%edi
+       movl    CPUVAR(CPUID),%r9d
+
+       /* for the FPU/"extended CPU state" handling below */
+       movq    xsave_mask(%rip),%rdx
+       movl    %edx,%eax
+       shrq    $32,%rdx

       /* If old proc exited, don't bother. */
       testq   %r13,%r13
@@ -358,7 +364,7 @@ ENTRY(cpu_switchto)
        *   %rax, %rcx - scratch
        *   %r13 - old proc, then old pcb
        *   %r12 - new proc
-        *   %edi - cpuid
+        *   %r9d - cpuid
        */

       movq    P_ADDR(%r13),%r13
@@ -366,16 +372,46 @@ ENTRY(cpu_switchto)
       /* clear the old pmap's bit for the cpu */
       movq    PCB_PMAP(%r13),%rcx
       lock
-       btrq    %rdi,PM_CPUS(%rcx)
+       btrq    %r9,PM_CPUS(%rcx)

       /* Save stack pointers. */
       movq    %rsp,PCB_RSP(%r13)
       movq    %rbp,PCB_RBP(%r13)

+       /*
+        * If the old proc ran in userspace then save the
+        * floating-point/"extended state" registers
+        */
+       testl   $CPUF_USERXSTATE,CPUVAR(FLAGS)
+       jz      .Lxstate_reset
+
+       movq    %r13, %rdi
+#if PCB_SAVEFPU != 0
+       addq    $PCB_SAVEFPU,%rdi
+#endif
+       CODEPATCH_START
+       .byte 0x48; fxsave      (%rdi)          /* really fxsave64 */
+       CODEPATCH_END(CPTAG_XSAVE)
+
switch_exited:
-       /* did old proc run in userspace?  then reset the segment regs */
-       btrl    $CPUF_USERSEGS_BIT, CPUVAR(FLAGS)
-       jnc     restore_saved
+       /* now clear the xstate */
+       movq    proc0paddr(%rip),%rdi
+#if PCB_SAVEFPU != 0
+       addq    $PCB_SAVEFPU,%rdi
+#endif
+       CODEPATCH_START
+       .byte 0x48; fxrstor     (%rdi)          /* really fxrstor64 */
+       CODEPATCH_END(CPTAG_XRSTOR)
+       andl    $~CPUF_USERXSTATE,CPUVAR(FLAGS)
+
+.Lxstate_reset:
+       /*
+        * If the segment registers haven't been reset since the old proc
+        * ran in userspace then reset them now
+        */
+       testl   $CPUF_USERSEGS,CPUVAR(FLAGS)
+       jz      restore_saved
+       andl    $~CPUF_USERSEGS,CPUVAR(FLAGS)

       /* set %ds, %es, %fs, and %gs to expected value to prevent info leak */
       movw    $(GSEL(GUDATA_SEL, SEL_UPL)),%ax
@@ -432,32 +468,17 @@ restore_saved:
0:

       /* set the new pmap's bit for the cpu */
-       movl    CPUVAR(CPUID),%edi
       lock
-       btsq    %rdi,PM_CPUS(%rcx)
+       btsq    %r9,PM_CPUS(%rcx)
#ifdef DIAGNOSTIC
       jc      _C_LABEL(switch_pmcpu_set)
#endif

switch_restored:
-       /* Restore cr0 (including FPU state). */
-       movl    PCB_CR0(%r13),%ecx
-#ifdef MULTIPROCESSOR
-       movq    PCB_FPCPU(%r13),%r8
-       cmpq    CPUVAR(SELF),%r8
-       jz      1f
-       orl     $CR0_TS,%ecx
-1:
-#endif
-       movq    %rcx,%cr0
-
       SET_CURPCB(%r13)

       /* Interrupts are okay again. */
       sti
-
-switch_return:
-
       popq    %r15
       popq    %r14
       popq    %r13
@@ -497,7 +518,7 @@ ENTRY(cpu_idle_leave)

#ifdef DIAGNOSTIC
NENTRY(switch_pmcpu_set)
-       movabsq $switch_active,%rdi
+       leaq    switch_active(%rip),%rdi
       call    _C_LABEL(panic)
       /* NOTREACHED */

@@ -529,7 +550,7 @@ IDTVEC(syscall)
        * %rip and the original rflags has been copied to %r11.  %cs and
        * %ss have been updated to the kernel segments, but %rsp is still
        * the user-space value.
-        * First order of business is to swap to the kernel gs.base so that
+        * First order of business is to swap to the kernel GS.base so that
        * we can access our struct cpu_info and use the scratch space there
        * to switch to the kernel page tables (thank you, Intel), then
        * switch to our kernel stack.  Once that's in place we can
@@ -563,7 +584,7 @@ NENTRY(Xsyscall_untramp)
       movq    %r11, TF_RFLAGS(%rsp)   /* old rflags from syscall insn */
       movq    $(GSEL(GUCODE_SEL, SEL_UPL)), TF_CS(%rsp)
       movq    %rcx,TF_RIP(%rsp)
-       movq    $2,TF_ERR(%rsp)         /* ignored */
+       movq    %rax,TF_ERR(%rsp)       /* stash syscall # for SPL check */

       movq    CPUVAR(CURPROC),%r14
       movq    %rsp,P_MD_REGS(%r14)    # save pointer to frame
@@ -590,8 +611,17 @@ NENTRY(Xsyscall_untramp)

       /* Could registers have been changed that require an iretq? */
       testl   $MDP_IRET, P_MD_FLAGS(%r14)
-       jne     intr_fast_exit
+       jne     intr_user_exit_post_ast
+
+       /* Restore FPU/"extended CPU state" if it's not already in the CPU */
+       testl   $CPUF_USERXSTATE,CPUVAR(FLAGS)
+       jz      .Lsyscall_restore_xstate
+
+       /* Restore FS.base if it's not already in the CPU */
+       testl   $CPUF_USERSEGS,CPUVAR(FLAGS)
+       jz      .Lsyscall_restore_fsbase

+.Lsyscall_restore_registers:
       movq    TF_RDI(%rsp),%rdi
       movq    TF_RSI(%rsp),%rsi
       movq    TF_R8(%rsp),%r8
@@ -604,17 +634,6 @@ NENTRY(Xsyscall_untramp)
       movq    TF_RBP(%rsp),%rbp
       movq    TF_RBX(%rsp),%rbx

-       /* Restore FS.base if it's not already in the CPU */
-       btsl    $CPUF_USERSEGS_BIT,CPUVAR(FLAGS)
-       jc      99f
-       movq    CPUVAR(CURPCB),%rdx
-       movq    PCB_FSBASE(%rdx),%rax
-       movq    %rax,%rdx
-       shrq    $32,%rdx
-       movl    $MSR_FSBASE,%ecx
-       wrmsr
-99:
-
       /*
        * We need to finish reading from the trapframe, then switch
        * to the user page tables, swapgs, and return.  We need
@@ -642,11 +661,42 @@ KUENTRY(syscall_trampback)
       sysretq

       .text
+       .align  16,0xcc
+       /* in this case, need FS.base but not xstate, rarely happens */
+.Lsyscall_restore_fsbase:      /* CPU doesn't have curproc's FS.base */
+       orl     $CPUF_USERSEGS,CPUVAR(FLAGS)
+       movq    CPUVAR(CURPCB),%rdi
+       jmp     .Lsyscall_restore_fsbase_real
+
+       .align  16,0xcc
+.Lsyscall_restore_xstate:      /* CPU doesn't have curproc's xstate */
+       orl     $(CPUF_USERXSTATE|CPUF_USERSEGS),CPUVAR(FLAGS)
+       movq    CPUVAR(CURPCB),%rdi
+       movq    xsave_mask(%rip),%rdx
+       movl    %edx,%eax
+       shrq    $32,%rdx
+#if PCB_SAVEFPU != 0
+       addq    $PCB_SAVEFPU,%rdi
+#endif
+       /* untouched state so can't fault */
+       CODEPATCH_START
+       .byte 0x48; fxrstor     (%rdi)          /* really fxrstor64 */
+       CODEPATCH_END(CPTAG_XRSTOR)
+#if PCB_SAVEFPU != 0
+       subq    $PCB_SAVEFPU,%rdi
+#endif
+.Lsyscall_restore_fsbase_real:
+       movq    PCB_FSBASE(%rdi),%rdx
+       movl    %edx,%eax
+       shrq    $32,%rdx
+       movl    $MSR_FSBASE,%ecx
+       wrmsr
+       jmp     .Lsyscall_restore_registers

#ifdef DIAGNOSTIC
.Lsyscall_spl_not_lowered:
-       movabsq $spl_lowered, %rdi
-       movl    TF_RAX(%rsp),%esi
+       leaq    spl_lowered(%rip), %rdi
+       movl    TF_ERR(%rsp),%esi       /* syscall # stashed above */
       movl    TF_RDI(%rsp),%edx
       movl    %ebx,%ecx
       movl    CPUVAR(ILEVEL),%r8d
@@ -676,15 +726,54 @@ NENTRY(proc_trampoline)


/*
- * Return via iretq, for real interrupts and signal returns
+ * Returning to userspace via iretq.  We do things in this order:
+ *  - check for ASTs
+ *  - restore FPU/"extended CPU state" if it's not already in the CPU
+ *  - DIAGNOSTIC: no more C calls after this, so check the SPL
+ *  - restore FS.base if it's not already in the CPU
+ *  - restore most registers
+ *  - update the iret frame from the trapframe
+ *  - finish reading from the trapframe
+ *  - switch to the trampoline stack   \
+ *  - jump to the .kutext segment      |-- Meltdown workaround
+ *  - switch to the user page tables   /
+ *  - swapgs
+ *  - iretq
 */
-NENTRY(intr_fast_exit)
+NENTRY(intr_user_exit)
#ifdef DIAGNOSTIC
       pushfq
       popq    %rdx
       testq   $PSL_I,%rdx
-       jnz     .Lintr_exit_not_blocked
+       jnz     .Lintr_user_exit_not_blocked
+#endif /* DIAGNOSTIC */
+
+       /* Check for ASTs */
+       CHECK_ASTPENDING(%r11)
+       je      intr_user_exit_post_ast
+       CLEAR_ASTPENDING(%r11)
+       sti
+       movq    %rsp,%rdi
+       call    _C_LABEL(ast)
+       cli
+       jmp     intr_user_exit
+
+intr_user_exit_post_ast:
+       /* Restore FPU/"extended CPU state" if it's not already in the CPU */
+       testl   $CPUF_USERXSTATE,CPUVAR(FLAGS)
+       jz      .Lintr_restore_xstate
+
+#ifdef DIAGNOSTIC
+       /* no more C calls after this, so check the SPL */
+       cmpl    $0,CPUVAR(ILEVEL)
+       jne     .Luser_spl_not_lowered
#endif /* DIAGNOSTIC */
+
+       /* Restore FS.base if it's not already in the CPU */
+       testl   $CPUF_USERSEGS,CPUVAR(FLAGS)
+       jz      .Lintr_restore_fsbase
+
+.Lintr_restore_registers:
       movq    TF_RDI(%rsp),%rdi
       movq    TF_RSI(%rsp),%rsi
       movq    TF_R8(%rsp),%r8
@@ -697,30 +786,7 @@ NENTRY(intr_fast_exit)
       movq    TF_RBP(%rsp),%rbp
       movq    TF_RBX(%rsp),%rbx

-       testq   $SEL_RPL,TF_CS(%rsp)
-       je      intr_exit_recurse               /* returning back to kernel? */
-
-       /* returning to userspace.  XXX fix up iret frame here */
-
-       /* restore FS.base if it's not already in the CPU */
-       btsl    $CPUF_USERSEGS_BIT,CPUVAR(FLAGS)
-       jc      99f
-       movq    CPUVAR(CURPCB),%rdx             /* for below */
-       movq    PCB_FSBASE(%rdx),%rax
-       movq    %rax,%rdx
-       shrq    $32,%rdx
-       movl    $MSR_FSBASE,%ecx
-       wrmsr
-99:
       /*
-        * Returning to userspace.  We need to go things in this order:
-        *  - update the iret frame from the trapframe
-        *  - finish reading from the trapframe
-        *  - switch to the trampoline stack
-        *  - jump to the .kutext segment
-        *  - switch to the user page tables
-        *  - swapgs
-        *  - iretq
        * To get the final value for the register that was used
        * for the mov to %cr3, we need access to somewhere accessible
        * on the user page tables, so we save it in CPUVAR(SCRATCH)
@@ -758,7 +824,101 @@ KUENTRY(iretq_tramp)
_C_LABEL(doreti_iret):
       iretq

-NENTRY(intr_exit_recurse)
+       .text
+       .align  16,0xcc
+.Lintr_restore_xstate:         /* CPU doesn't have curproc's xstate */
+       orl     $CPUF_USERXSTATE,CPUVAR(FLAGS)
+       movq    CPUVAR(CURPCB),%rdi
+#if PCB_SAVEFPU != 0
+       addq    $PCB_SAVEFPU,%rdi
+#endif
+       movq    xsave_mask(%rip),%rsi
+       call    xrstor_user
+       testl   %eax,%eax
+       jnz     .Lintr_xrstor_faulted
+.Lintr_restore_fsbase:         /* CPU doesn't have curproc's FS.base */
+       orl     $CPUF_USERSEGS,CPUVAR(FLAGS)
+       movq    CPUVAR(CURPCB),%rdx
+       movq    PCB_FSBASE(%rdx),%rdx
+       movl    %edx,%eax
+       shrq    $32,%rdx
+       movl    $MSR_FSBASE,%ecx
+       wrmsr
+       jmp     .Lintr_restore_registers
+
+.Lintr_xrstor_faulted:
+       /*
+        * xrstor faulted; we need to reset the FPU state and call trap()
+        * to post a signal, which requires interrupts be enabled.
+        */
+       sti
+       movq    proc0paddr(%rip),%rdi
+#if PCB_SAVEFPU != 0
+       addq    $PCB_SAVEFPU,%rdi
+#endif
+       CODEPATCH_START
+       .byte 0x48; fxrstor     (%rdi)          /* really fxrstor64 */
+       CODEPATCH_END(CPTAG_XRSTOR)
+       movq    $T_PROTFLT,TF_TRAPNO(%rsp)
+       jmp     recall_trap
+
+#ifdef DIAGNOSTIC
+.Lintr_user_exit_not_blocked:
+       movl    warn_once(%rip),%edi
+       testl   %edi,%edi
+       jnz     1f
+       incl    %edi
+       movl    %edi,warn_once(%rip)
+       leaq    .Lnot_blocked(%rip),%rdi
+       call    _C_LABEL(printf)
+#ifdef DDB
+       int     $3
+#endif /* DDB */
+1:     cli
+       jmp     intr_user_exit
+
+.Luser_spl_not_lowered:
+       sti
+       leaq    intr_spl_lowered(%rip),%rdi
+       movl    CPUVAR(ILEVEL),%esi
+       xorl    %edx,%edx               /* always SPL zero for userspace */
+       xorl    %eax,%eax
+       call    _C_LABEL(printf)
+#ifdef DDB
+       int     $3
+#endif /* DDB */
+       movl    $0,CPUVAR(ILEVEL)
+       cli
+       jmp     intr_user_exit
+
+       .section .rodata
+intr_spl_lowered:
+       .asciz  "WARNING: SPL NOT LOWERED ON TRAP EXIT %x %x\n"
+       .text
+#endif /* DIAGNOSTIC */
+
+
+/*
+ * Return to supervisor mode from trap or interrupt
+ */
+NENTRY(intr_fast_exit)
+#ifdef DIAGNOSTIC
+       pushfq
+       popq    %rdx
+       testq   $PSL_I,%rdx
+       jnz     .Lintr_exit_not_blocked
+#endif /* DIAGNOSTIC */
+       movq    TF_RDI(%rsp),%rdi
+       movq    TF_RSI(%rsp),%rsi
+       movq    TF_R8(%rsp),%r8
+       movq    TF_R9(%rsp),%r9
+       movq    TF_R10(%rsp),%r10
+       movq    TF_R12(%rsp),%r12
+       movq    TF_R13(%rsp),%r13
+       movq    TF_R14(%rsp),%r14
+       movq    TF_R15(%rsp),%r15
+       movq    TF_RBP(%rsp),%rbp
+       movq    TF_RBX(%rsp),%rbx
       movq    TF_RDX(%rsp),%rdx
       movq    TF_RCX(%rsp),%rcx
       movq    TF_R11(%rsp),%r11
@@ -813,7 +973,6 @@ NENTRY(intr_exit_recurse)

#ifdef DIAGNOSTIC
.Lintr_exit_not_blocked:
-       xchgw   %bx, %bx
       movl    warn_once(%rip),%edi
       testl   %edi,%edi
       jnz     1f
@@ -837,18 +996,71 @@ warn_once:
       .text
#endif

+/*
+ * FPU/"extended CPU state" handling
+ *     int xrstor_user(sfp, mask)
+ *             load given state, returns 0/1 if okay/it trapped
+ *     void fpusave(sfp)
+ *             save current state, but retain it in the FPU
+ *     void fpusavereset(sfp)
+ *             save current state and reset FPU to initial/kernel state
+ */
+
ENTRY(xrstor_user)
       movq    %rsi, %rdx
       movl    %esi, %eax
       shrq    $32, %rdx
       .globl  xrstor_fault
xrstor_fault:
-       xrstor  (%rdi)
+       CODEPATCH_START
+       .byte 0x48; fxrstor     (%rdi)          /* really fxrstor64 */
+       CODEPATCH_END(CPTAG_XRSTOR)
       xorl    %eax, %eax
       ret
-ENTRY(xrstor_resume)
+NENTRY(xrstor_resume)
       movl    $1, %eax
       ret
+END(xrstor_user)
+
+ENTRY(fpusave)
+       movq    xsave_mask(%rip),%rdx
+       movl    %edx,%eax
+       shrq    $32,%rdx
+       CODEPATCH_START
+       .byte 0x48; fxsave      (%rdi)          /* really fxsave64 */
+       CODEPATCH_END(CPTAG_XSAVE)
+       ret
+END(fpusave)
+
+ENTRY(fpusavereset)
+       movq    xsave_mask(%rip),%rdx
+       movl    %edx,%eax
+       shrq    $32,%rdx
+       CODEPATCH_START
+       .byte 0x48; fxsave      (%rdi)          /* really fxsave64 */
+       CODEPATCH_END(CPTAG_XSAVE)
+       movq    proc0paddr(%rip),%rdi
+#if PCB_SAVEFPU != 0
+       addq    $PCB_SAVEFPU,%rdi
+#endif
+       CODEPATCH_START
+       .byte 0x48; fxrstor     (%rdi)          /* really fxrstor64 */
+       CODEPATCH_END(CPTAG_XRSTOR)
+       ret
+END(fpusavereset)
+
+       .section .rodata
+       .globl  _C_LABEL(_xrstor)
+_C_LABEL(_xrstor):
+       .byte 0x48; xrstor      (%rdi)          /* really xrstor64 */
+
+       .globl  _C_LABEL(_xsave)
+_C_LABEL(_xsave):
+       .byte 0x48; xsave       (%rdi)          /* really xsave64 */
+
+       .globl  _C_LABEL(_xsaveopt)
+_C_LABEL(_xsaveopt):
+       .byte 0x48; xsaveopt    (%rdi)          /* really xsaveopt64 */

ENTRY(pagezero)
       movq    $-PAGE_SIZE,%rdx
Index: sys/arch/amd64/amd64/locore0.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/locore0.S,v
retrieving revision 1.2.2.1
diff -u -p -r1.2.2.1 locore0.S
--- sys/arch/amd64/amd64/locore0.S      26 Feb 2018 12:29:48 -0000      1.2.2.1
+++ sys/arch/amd64/amd64/locore0.S      21 Jun 2018 11:54:01 -0000
@@ -601,7 +601,7 @@ write_efer:
        * 4. Enable paging and the rest of it.
        */
       movl    %cr0,%eax
-       orl     $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP),%eax
+       orl     $CR0_DEFAULT,%eax
       movl    %eax,%cr0
       jmp     compat
compat:
Index: sys/arch/amd64/amd64/machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.231.2.1
diff -u -p -r1.231.2.1 machdep.c
--- sys/arch/amd64/amd64/machdep.c      26 Feb 2018 12:29:48 -0000      1.231.2.1
+++ sys/arch/amd64/amd64/machdep.c      21 Jun 2018 11:54:01 -0000
@@ -395,7 +395,6 @@ x86_64_proc0_tss_ldt_init(void)
       struct pcb *pcb;

       cpu_info_primary.ci_curpcb = pcb = &proc0.p_addr->u_pcb;
-       pcb->pcb_cr0 = rcr0();
       pcb->pcb_fsbase = 0;
       pcb->pcb_kstack = (u_int64_t)proc0.p_addr + USPACE - 16;
       proc0.p_md.md_regs = (struct trapframe *)pcb->pcb_kstack - 1;
@@ -404,20 +403,6 @@ x86_64_proc0_tss_ldt_init(void)
       lldt(0);
}

-/*
- * Set up TSS for a new PCB.
- */
-
-#ifdef MULTIPROCESSOR
-void
-x86_64_init_pcb_tss_ldt(struct cpu_info *ci)
-{
-       struct pcb *pcb = ci->ci_idle_pcb;
-
-       pcb->pcb_cr0 = rcr0();
-}
-#endif /* MULTIPROCESSOR */
-
bios_diskinfo_t *
bios_getdiskinfo(dev_t dev)
{
@@ -579,6 +564,7 @@ sendsig(sig_t catcher, int sig, int mask
       struct trapframe *tf = p->p_md.md_regs;
       struct sigacts *psp = p->p_p->ps_sigacts;
       struct sigcontext ksc;
+       struct savefpu *sfp = &p->p_addr->u_pcb.pcb_savefpu;
       siginfo_t ksi;
       register_t sp, scp, sip;
       u_long sss;
@@ -597,17 +583,19 @@ sendsig(sig_t catcher, int sig, int mask
       sp &= ~15ULL;   /* just in case */
       sss = (sizeof(ksc) + 15) & ~15;

-       if (p->p_md.md_flags & MDP_USEDFPU) {
-               fpusave_proc(p, 1);
-               sp -= fpu_save_len;
-               ksc.sc_fpstate = (struct fxsave64 *)sp;
-               if (copyout(&p->p_addr->u_pcb.pcb_savefpu.fp_fxsave,
-                   (void *)sp, fpu_save_len))
-                       sigexit(p, SIGILL);
+       /* Save FPU state to PCB if necessary, then copy it out */
+       if (curcpu()->ci_flags & CPUF_USERXSTATE) {
+               curcpu()->ci_flags &= ~CPUF_USERXSTATE;
+               fpusavereset(&p->p_addr->u_pcb.pcb_savefpu);
+       }
+       sp -= fpu_save_len;
+       ksc.sc_fpstate = (struct fxsave64 *)sp;
+       if (copyout(sfp, (void *)sp, fpu_save_len))
+               sigexit(p, SIGILL);

-               /* Signal handlers get a completely clean FP state */
-               p->p_md.md_flags &= ~MDP_USEDFPU;
-       }
+       /* Now reset the FPU state in PCB */
+       memcpy(&p->p_addr->u_pcb.pcb_savefpu,
+           &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len);

       sip = 0;
       if (psp->ps_siginfo & sigmask(sig)) {
@@ -637,6 +625,9 @@ sendsig(sig_t catcher, int sig, int mask
       tf->tf_rflags &= ~(PSL_T|PSL_D|PSL_VM|PSL_AC);
       tf->tf_rsp = scp;
       tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
+
+       /* The reset state _is_ the userspace state for this thread now */
+       curcpu()->ci_flags |= CPUF_USERXSTATE;
}

/*
@@ -681,16 +672,23 @@ sys_sigreturn(struct proc *p, void *v, r
           !USERMODE(ksc.sc_cs, ksc.sc_eflags))
               return (EINVAL);

-       if (p->p_md.md_flags & MDP_USEDFPU)
-               fpusave_proc(p, 0);
+       /* Current state is obsolete; toss it and force a reload */
+       if (curcpu()->ci_flags & CPUF_USERXSTATE) {
+               curcpu()->ci_flags &= ~CPUF_USERXSTATE;
+               fpureset();
+       }

-       if (ksc.sc_fpstate) {
+       /* Copy in the FPU state to restore */
+       if (__predict_true(ksc.sc_fpstate != NULL)) {
               struct fxsave64 *fx = &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave;

               if ((error = copyin(ksc.sc_fpstate, fx, fpu_save_len)))
                       return (error);
               fx->fx_mxcsr &= fpu_mxcsr_mask;
-               p->p_md.md_flags |= MDP_USEDFPU;
+       } else {
+               /* shouldn't happen, but handle it */
+               memcpy(&p->p_addr->u_pcb.pcb_savefpu,
+                   &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len);
       }

       ksc.sc_trapno = tf->tf_trapno;
@@ -707,6 +705,7 @@ sys_sigreturn(struct proc *p, void *v, r
        * when a signal was being delivered, the process will be
        * completely restored, including the userland %rcx and %r11
        * registers which the 'sysretq' instruction cannot restore.
+        * Also need to make sure we can handle faulting on xrstor.
        */
       p->p_md.md_flags |= MDP_IRET;

@@ -1092,10 +1091,19 @@ setregs(struct proc *p, struct exec_pack
{
       struct trapframe *tf;

-       /* If we were using the FPU, forget about it. */
-       if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
-               fpusave_proc(p, 0);
-       p->p_md.md_flags &= ~MDP_USEDFPU;
+       /* Reset FPU state in PCB */
+       memcpy(&p->p_addr->u_pcb.pcb_savefpu,
+           &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len);
+
+       if (curcpu()->ci_flags & CPUF_USERXSTATE) {
+               /* state in CPU is obsolete; reset it */
+               fpureset();
+       } else {
+               /* the reset state _is_ the userspace state now */
+               curcpu()->ci_flags |= CPUF_USERXSTATE;
+       }
+
+       /* To reset all registers we have to return via iretq */
       p->p_md.md_flags |= MDP_IRET;

       reset_segs();
Index: sys/arch/amd64/amd64/mptramp.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/mptramp.S,v
retrieving revision 1.15
diff -u -p -r1.15 mptramp.S
--- sys/arch/amd64/amd64/mptramp.S      29 Jun 2017 08:14:36 -0000      1.15
+++ sys/arch/amd64/amd64/mptramp.S      21 Jun 2018 11:54:01 -0000
@@ -120,7 +120,7 @@ _C_LABEL(cpu_spinup_trampoline):
       movw    %ax, %ss
       addr32 lgdtl (.Lmptramp_gdt32_desc)   # load flat descriptor table
       movl    %cr0, %eax       # get cr0
-       orl     $0x1, %eax      # enable protected mode
+       orl     $CR0_PE, %eax   # enable protected mode
       movl    %eax, %cr0      # doit
       ljmpl   $0x8, $.Lmp_startup

@@ -179,7 +179,7 @@ _TRMP_LABEL(.Lmp_startup)
       movl    $.Lmptramp_jmp64,%eax

       movl    %cr0,%ecx               # get control word
-       orl     $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP),%ecx
+       orl     $CR0_DEFAULT,%ecx
       movl    %ecx, %cr0

       ljmp    *(%eax)
@@ -230,7 +230,7 @@ _C_LABEL(cpu_spinup_trampoline_end):        #en
       /* Switch address space. */
       movq    PCB_CR3(%rsi),%rax
       movq    %rax,%cr3
-       movl    PCB_CR0(%rsi),%eax
+       movl    $CR0_DEFAULT,%eax
       movq    %rax,%cr0
       call    _C_LABEL(cpu_hatch)
       /* NOTREACHED */
Index: sys/arch/amd64/amd64/process_machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/process_machdep.c,v
retrieving revision 1.14
diff -u -p -r1.14 process_machdep.c
--- sys/arch/amd64/amd64/process_machdep.c      28 Jun 2015 18:54:54 -0000      1.14
+++ sys/arch/amd64/amd64/process_machdep.c      21 Jun 2018 11:54:01 -0000
@@ -127,19 +127,6 @@ process_read_fpregs(struct proc *p, stru
{
       struct fxsave64 *frame = process_fpframe(p);

-       if (p->p_md.md_flags & MDP_USEDFPU) {
-               fpusave_proc(p, 1);
-       } else {
-               /* Fake a FNINIT. */
-               memset(frame, 0, sizeof(*regs));
-               frame->fx_fcw = __INITIAL_NPXCW__;
-               frame->fx_fsw = 0x0000;
-               frame->fx_ftw = 0x00;
-               frame->fx_mxcsr = __INITIAL_MXCSR__;
-               frame->fx_mxcsr_mask = fpu_mxcsr_mask;
-               p->p_md.md_flags |= MDP_USEDFPU;
-       }
-
       memcpy(&regs->fxstate, frame, sizeof(*regs));
       return (0);
}
@@ -189,14 +176,11 @@ process_write_fpregs(struct proc *p, str
{
       struct fxsave64 *frame = process_fpframe(p);

-       if (p->p_md.md_flags & MDP_USEDFPU) {
-               fpusave_proc(p, 0);
-       } else {
-               p->p_md.md_flags |= MDP_USEDFPU;
-       }
-
       memcpy(frame, &regs->fxstate, sizeof(*regs));
       frame->fx_mxcsr &= fpu_mxcsr_mask;
+
+       /* force target to return via iretq so bogus xstate can be handled */
+       p->p_md.md_flags |= MDP_IRET;
       return (0);
}

Index: sys/arch/amd64/amd64/spl.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/spl.S,v
retrieving revision 1.11.4.1
diff -u -p -r1.11.4.1 spl.S
--- sys/arch/amd64/amd64/spl.S  26 Feb 2018 12:29:48 -0000      1.11.4.1
+++ sys/arch/amd64/amd64/spl.S  21 Jun 2018 11:54:01 -0000
@@ -158,18 +158,6 @@ KIDTVEC(doreti)
       jmp     *IS_RESUME(%rax)
2:     /* Check for ASTs on exit to user mode. */
       movl    %ebx,CPUVAR(ILEVEL)
-5:     CHECK_ASTPENDING(%r11)
-       je      3f
-       testb   $SEL_RPL,TF_CS(%rsp)
-       jz      3f
-4:     CLEAR_ASTPENDING(%r11)
-       sti
-       movq    %rsp, %rdi
-       call    _C_LABEL(ast)
-       cli
-       jmp     5b
-3:
-#ifdef DIAGNOSTIC
-       movl    $254,%esi
-#endif /* DIAGNOSTIC */
+       testb   $SEL_RPL,TF_CS(%rsp)
+       jnz     intr_user_exit
       INTRFASTEXIT
Index: sys/arch/amd64/amd64/trap.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/trap.c,v
retrieving revision 1.61.2.1
diff -u -p -r1.61.2.1 trap.c
--- sys/arch/amd64/amd64/trap.c 26 Feb 2018 12:29:48 -0000      1.61.2.1
+++ sys/arch/amd64/amd64/trap.c 21 Jun 2018 11:54:01 -0000
@@ -97,7 +97,7 @@ void trap(struct trapframe *);
void ast(struct trapframe *);
void syscall(struct trapframe *);

-const char *trap_type[] = {
+const char * const trap_type[] = {
       "privileged instruction fault",         /*  0 T_PRIVINFLT */
       "breakpoint trap",                      /*  1 T_BPTFLT */
       "arithmetic trap",                      /*  2 T_ARITHTRAP */
@@ -119,17 +119,18 @@ const char *trap_type[] = {
       "machine check",                        /* 18 T_MCA */
       "SSE FP exception",                     /* 19 T_XMM */
};
-int    trap_types = nitems(trap_type);
+const int      trap_types = nitems(trap_type);

#ifdef DEBUG
int    trapdebug = 0;
#endif

-#define        IDTVEC(name)    __CONCAT(X, name)
+static inline void frame_dump(struct trapframe *_tf, struct proc *_p,
+    const char *_sig, uint64_t _cr2);
+static inline void verify_smap(const char *_func);
+static inline void debug_trap(struct trapframe *_frame, struct proc *_p,
+    long _type);

-#ifdef TRAP_SIGDEBUG
-static void frame_dump(struct trapframe *);
-#endif

/*
 * trap(frame):
@@ -144,38 +145,17 @@ trap(struct trapframe *frame)
       struct proc *p = curproc;
       int type = (int)frame->tf_trapno;
       struct pcb *pcb;
-       extern char doreti_iret[], resume_iret[];
-       extern char xrstor_fault[], xrstor_resume[];
       caddr_t onfault;
       int error;
       uint64_t cr2;
       union sigval sv;

+       verify_smap(__func__);
       uvmexp.traps++;
+       debug_trap(frame, p, type);

       pcb = (p != NULL && p->p_addr != NULL) ? &p->p_addr->u_pcb : NULL;

-#ifdef DEBUG
-       if (trapdebug) {
-               printf("trap %d code %llx rip %llx cs %llx rflags %llx "
-                      "cr2 %llx cpl %x\n",
-                   type, frame->tf_err, frame->tf_rip, frame->tf_cs,
-                   frame->tf_rflags, rcr2(), curcpu()->ci_ilevel);
-               printf("curproc %p\n", (void *)p);
-               if (p != NULL)
-                       printf("pid %d\n", p->p_p->ps_pid);
-       }
-#endif
-#ifdef DIAGNOSTIC
-       if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) {
-               u_long rf = read_rflags();
-               if (rf & PSL_AC) {
-                       write_rflags(rf & ~PSL_AC);
-                       panic("%s: AC set on entry", "trap");
-               }
-       }
-#endif
-
       if (!KERNELMODE(frame->tf_cs, frame->tf_rflags)) {
               type |= T_USER;
               p->p_md.md_regs = frame;
@@ -205,27 +185,6 @@ trap(struct trapframe *frame)
               /*NOTREACHED*/

       case T_PROTFLT:
-               /*
-                * Check for xrstor faulting because of invalid xstate
-                * We do this by looking at the address of the
-                * instruction that faulted.
-                */
-               if (frame->tf_rip == (u_int64_t)xrstor_fault && p != NULL) {
-                       frame->tf_rip = (u_int64_t)xrstor_resume;
-                       return;
-               }
-
-               /*
-                * Check for failure during return to user mode.
-                * We do this by looking at the address of the
-                * instruction that faulted.
-                */
-               if (frame->tf_rip == (u_int64_t)doreti_iret) {
-                       frame->tf_rip = (u_int64_t)resume_iret;
-                       return;
-               }
-               /* FALLTHROUGH */
-
       case T_SEGNPFLT:
       case T_ALIGNFLT:
       case T_TSSFLT:
@@ -243,12 +202,7 @@ copyfault:
       case T_TSSFLT|T_USER:
       case T_SEGNPFLT|T_USER:
       case T_STKFLT|T_USER:
-#ifdef TRAP_SIGDEBUG
-               printf("pid %d (%s): %s at rip %llx addr %llx\n",
-                   p->p_p->ps_pid, p->p_p->ps_comm, "BUS",
-                   frame->tf_rip, rcr2());
-               frame_dump(frame);
-#endif
+               frame_dump(frame, p, "BUS", 0);
               sv.sival_ptr = (void *)frame->tf_rip;
               KERNEL_LOCK();
               trapsignal(p, SIGBUS, type & ~T_USER, BUS_OBJERR, sv);
@@ -267,30 +221,11 @@ copyfault:
               trapsignal(p, SIGILL, type & ~T_USER, ILL_PRVOPC, sv);
               KERNEL_UNLOCK();
               goto out;
-       case T_FPOPFLT|T_USER:          /* coprocessor operand fault */
-#ifdef TRAP_SIGDEBUG
-               printf("pid %d (%s): %s at rip %llx addr %llx\n",
-                   p->p_p->ps_pid, p->p_p->ps_comm, "ILL",
-                   frame->tf_rip, rcr2());
-               frame_dump(frame);
-#endif
-               sv.sival_ptr = (void *)frame->tf_rip;
-               KERNEL_LOCK();
-               trapsignal(p, SIGILL, type & ~T_USER, ILL_COPROC, sv);
-               KERNEL_UNLOCK();
-               goto out;
+       case T_FPOPFLT|T_USER:          /* impossible without 32bit compat */
       case T_BOUND|T_USER:
-               sv.sival_ptr = (void *)frame->tf_rip;
-               KERNEL_LOCK();
-               trapsignal(p, SIGFPE, type &~ T_USER, FPE_FLTSUB, sv);
-               KERNEL_UNLOCK();
-               goto out;
       case T_OFLOW|T_USER:
-               sv.sival_ptr = (void *)frame->tf_rip;
-               KERNEL_LOCK();
-               trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTOVF, sv);
-               KERNEL_UNLOCK();
-               goto out;
+       case T_DNA|T_USER:
+               panic("impossible trap");
       case T_DIVIDE|T_USER:
               sv.sival_ptr = (void *)frame->tf_rip;
               KERNEL_LOCK();
@@ -401,18 +336,13 @@ faultcommon:
                           p->p_ucred ? (int)p->p_ucred->cr_uid : -1);
                       signal = SIGKILL;
               } else {
-#ifdef TRAP_SIGDEBUG
-                       printf("pid %d (%s): %s at rip %llx addr %llx\n",
-                           p->p_p->ps_pid, p->p_p->ps_comm, "SEGV",
-                           frame->tf_rip, rcr2());
-                       frame_dump(frame);
-#endif
-               }
-               if (error == EACCES)
-                       sicode = SEGV_ACCERR;
-               if (error == EIO) {
-                       signal = SIGBUS;
-                       sicode = BUS_OBJERR;
+                       frame_dump(frame, p, "SEGV", cr2);
+                       if (error == EACCES)
+                               sicode = SEGV_ACCERR;
+                       else if (error == EIO) {
+                               signal = SIGBUS;
+                               sicode = BUS_OBJERR;
+                       }
               }
               sv.sival_ptr = (void *)fa;
               trapsignal(p, signal, T_PAGEFLT, sicode, sv);
@@ -455,10 +385,12 @@ out:
       userret(p);
}

-#ifdef TRAP_SIGDEBUG
-static void
-frame_dump(struct trapframe *tf)
+static inline void
+frame_dump(struct trapframe *tf, struct proc *p, const char *sig, uint64_t cr2)
{
+#ifdef TRAP_SIGDEBUG
+       printf("pid %d (%s): %s at rip %llx addr %llx\n",
+           p->p_p->ps_pid, p->p_p->ps_comm, sig, tf->tf_rip, cr2);
       printf("rip %p  cs 0x%x  rfl %p  rsp %p  ss 0x%x\n",
           (void *)tf->tf_rip, (unsigned)tf->tf_cs & 0xffff,
           (void *)tf->tf_rflags,
@@ -475,8 +407,38 @@ frame_dump(struct trapframe *tf)
           (void *)tf->tf_r13, (void *)tf->tf_r14, (void *)tf->tf_r15);
       printf("rbp %p  rbx %p  rax %p\n",
           (void *)tf->tf_rbp, (void *)tf->tf_rbx, (void *)tf->tf_rax);
+#endif
}
+
+static inline void
+verify_smap(const char *func)
+{
+#ifdef DIAGNOSTIC
+       if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) {
+               u_long rf = read_rflags();
+               if (rf & PSL_AC) {
+                       write_rflags(rf & ~PSL_AC);
+                       panic("%s: AC set on entry", func);
+               }
+       }
#endif
+}
+
+static inline void
+debug_trap(struct trapframe *frame, struct proc *p, long type)
+{
+#ifdef DEBUG
+       if (trapdebug) {
+               printf("trap %ld code %llx rip %llx cs %llx rflags %llx "
+                      "cr2 %llx cpl %x\n",
+                   type, frame->tf_err, frame->tf_rip, frame->tf_cs,
+                   frame->tf_rflags, rcr2(), curcpu()->ci_ilevel);
+               printf("curproc %p\n", (void *)p);
+               if (p != NULL)
+                       printf("pid %d\n", p->p_p->ps_pid);
+       }
+#endif
+}


/*
@@ -514,16 +476,7 @@ syscall(struct trapframe *frame)
       size_t argsize, argoff;
       register_t code, args[9], rval[2], *argp;

-#ifdef DIAGNOSTIC
-       if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) {
-               u_long rf = read_rflags();
-               if (rf & PSL_AC) {
-                       write_rflags(rf & ~PSL_AC);
-                       panic("%s: AC set on entry", "syscall");
-               }
-       }
-#endif
-
+       verify_smap(__func__);
       uvmexp.syscalls++;
       p = curproc;

Index: sys/arch/amd64/amd64/vector.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vector.S,v
retrieving revision 1.51.2.2
diff -u -p -r1.51.2.2 vector.S
--- sys/arch/amd64/amd64/vector.S       28 Feb 2018 17:01:34 -0000      1.51.2.2
+++ sys/arch/amd64/amd64/vector.S       21 Jun 2018 11:54:01 -0000
@@ -179,17 +179,7 @@ IDTVEC(trap05)
IDTVEC(trap06)
       ZTRAP(T_PRIVINFLT)
IDTVEC(trap07)
-       pushq   $0                      # dummy error code
-       pushq   $T_DNA
-       INTRENTRY(trap07)
-       sti
-       cld
-       SMAP_CLAC
-       movq    CPUVAR(SELF),%rdi
-       movq    %rsp, %rsi
-       call    _C_LABEL(fpudna)
-       cli
-       INTRFASTEXIT
+       ZTRAP(T_DNA)            # impossible: we don't do lazy FPU
IDTVEC(trap08)
       pushq   $T_DOUBLEFLT
       jmp     calltrap_specstk
@@ -202,59 +192,47 @@ IDTVEC(trap0b)
IDTVEC(trap0c)
       TRAP(T_STKFLT)

-       /*
-        * If iretq faults, we'll get a trap at doreti_iret with CPL==0 but
-        * the user's GS.base, which INTRENTRY wouldn't handle correctly
-        * (it would skip the swapgs), so locally expand both it and
-        * INTR_SAVE_GPRS, but add an extra test comparing %rip to doreti_iret
-        * so that we can do the necessary swapgs in that case.
-        */
+/*
+ * The #GP (general protection fault) handler has a couple weird cases
+ * to handle:
+ *  - trapping in iretq to userspace and
+ *  - trapping in xrstor in the kernel.
+ * We detect both of these by examining the %rip in the iretq_frame.
+ * Handling them is done by updating %rip in the iretq_frame to point
+ * to a stub handler of some sort and then iretq'ing to it.  For the
+ * iretq fault we resume in a stub which acts like we got a fresh #GP.
+ * For the xrstor fault we resume to a stub which returns an error to
+ * the routine that requested the xrstor.
+ */
IDTVEC(trap0d)
+       pushq   %rdx
       pushq   %rcx
-       leaq    _C_LABEL(doreti_iret)(%rip),%rcx
-       cmpq    %rcx,16(%rsp)           /* over %rcx and err to %rip */
+       movq    24(%rsp),%rdx           /* over %r[cd]x and err to %rip */
+       leaq    doreti_iret(%rip),%rcx
+       cmpq    %rcx,%rdx
+       je      .Lhandle_doreti
+       leaq    xrstor_fault(%rip),%rcx
+       cmpq    %rcx,%rdx
+       je      .Lhandle_xrstor
       popq    %rcx
-       je      1f
-       testq   $SEL_RPL,16(%rsp)       /* over err and %rip to %cs */
-       je      INTRENTRY_LABEL(trap0d)
-1:     swapgs
-       movq    %rax,CPUVAR(SCRATCH)
-       movq    CPUVAR(KERN_CR3),%rax
-       testq   %rax,%rax
-       jz      98f
-       movq    %rax,%cr3
-       jmp     98f
-       .text
-       .globl  INTRENTRY_LABEL(trap0d)
-INTRENTRY_LABEL(trap0d):       /* from kernel */
-       pushq   $T_PROTFLT
-       subq    $152,%rsp
-       movq    %rcx,TF_RCX(%rsp)
-       jmp     99f
-98:    /* from userspace */
-       movq    CPUVAR(KERN_RSP),%rax
-       xchgq   %rax,%rsp
-       movq    %rcx,TF_RCX(%rsp)
-       /* set trapno in the trap frame */
-       movq    $T_PROTFLT,TF_TRAPNO(%rsp)
-       /* copy err and iretq frame to the trap frame */
-       movq    0(%rax),%rcx
-       movq    %rcx,TF_ERR(%rsp)
-       add     $8,%rax
-       movq    IRETQ_RIP(%rax),%rcx
-       movq    %rcx,TF_RIP(%rsp)
-       movq    IRETQ_CS(%rax),%rcx
-       movq    %rcx,TF_CS(%rsp)
-       movq    IRETQ_RFLAGS(%rax),%rcx
-       movq    %rcx,TF_RFLAGS(%rsp)
-       movq    IRETQ_RSP(%rax),%rcx
-       movq    %rcx,TF_RSP(%rsp)
-       movq    IRETQ_SS(%rax),%rcx
-       movq    %rcx,TF_SS(%rsp)
-       movq    CPUVAR(SCRATCH),%rax
-99:    INTR_SAVE_MOST_GPRS_NO_ADJ
-       sti
-       jmp     calltrap
+       popq    %rdx
+       TRAP(T_PROTFLT)
+
+.Lhandle_xrstor:
+       /* xrstor faulted; just resume in xrstor_resume */
+       leaq    xrstor_resume(%rip),%rcx
+       jmp     1f
+
+.Lhandle_doreti:
+       /* iretq faulted; resume in a stub that acts like we got a #GP */
+       leaq    .Lhandle_doreti_resume(%rip),%rcx
+1:     movq    %rcx,24(%rsp)           /* over %r[cd]x and err to %rip */
+       popq    %rcx
+       popq    %rdx
+       addq    $8,%rsp                 /* pop the err code */
+       jmp     doreti_iret
+.Lhandle_doreti_resume:
+       ZTRAP(T_PROTFLT)

IDTVEC(trap0e)
       TRAP(T_PAGEFLT)
@@ -305,55 +283,12 @@ Xexceptions:
       .quad   _C_LABEL(Xtrap1e), _C_LABEL(Xtrap1f)

/*
- * If an error is detected during trap, syscall, or interrupt exit, trap() will
- * change %rip to point to this label.  At that point, we'll be running with
- * the kernel GS.base, but the trap frame will be from CPL==3, so we can't
- * go through INTRENTRY as it would do the swapgs that we don't want/need.
- * So, locally expand INTRENTRY but without the swapgs: manually
- * clean up the stack and resume as if we were handling a general
- * protection fault.  This will cause the process to get a SIGBUS.
- */
-NENTRY(resume_iret)
-       movq    %rax,CPUVAR(SCRATCH)
-       movq    CPUVAR(KERN_CR3),%rax
-       testq   %rax,%rax
-       jz      INTRENTRY_LABEL(iret)
-       movq    %rax,%cr3
-       jmp     INTRENTRY_LABEL(iret)
-       .text
-       .globl  INTRENTRY_LABEL(iret)
-INTRENTRY_LABEL(iret): /* from kernel */
-       movq    CPUVAR(KERN_RSP),%rax
-       xchgq   %rax,%rsp
-       movq    %rcx,TF_RCX(%rsp)
-       /* set trapno+err in the trap frame */
-       movq    $T_PROTFLT,TF_TRAPNO(%rsp)
-       movq    $0,TF_ERR(%rsp)
-       /* copy iretq frame to the trap frame */
-       movq    IRETQ_RIP(%rax),%rcx
-       movq    %rcx,TF_RIP(%rsp)
-       movq    IRETQ_CS(%rax),%rcx
-       movq    %rcx,TF_CS(%rsp)
-       movq    IRETQ_RFLAGS(%rax),%rcx
-       movq    %rcx,TF_RFLAGS(%rsp)
-       movq    IRETQ_RSP(%rax),%rcx
-       movq    %rcx,TF_RSP(%rsp)
-       movq    IRETQ_SS(%rax),%rcx
-       movq    %rcx,TF_SS(%rsp)
-       movq    CPUVAR(SCRATCH),%rax
-       INTR_SAVE_MOST_GPRS_NO_ADJ
-       sti
-       jmp     calltrap
-
-
-/*
 * All traps go through here. Call the generic trap handler, and
 * check for ASTs afterwards.
 */
KUENTRY(alltraps)
       INTRENTRY(alltraps)
       sti
-calltrap:
       cld
       SMAP_CLAC
#ifdef DIAGNOSTIC
@@ -376,19 +311,14 @@ calltrap:
       jz      2f
.Lreal_trap:
#endif /* !defined(GPROF) && defined(DDBPROF) */
+       .globl  recall_trap
+recall_trap:
       movq    %rsp, %rdi
       call    _C_LABEL(trap)
2:     /* Check for ASTs on exit to user mode. */
       cli
-       CHECK_ASTPENDING(%r11)
-       je      1f
       testb   $SEL_RPL,TF_CS(%rsp)
-       jz      1f
-5:     CLEAR_ASTPENDING(%r11)
-       sti
-       movq    %rsp, %rdi
-       call    _C_LABEL(ast)
-       jmp     2b
+       jnz     intr_user_exit
#ifndef DIAGNOSTIC
1:     INTRFASTEXIT
#else /* DIAGNOSTIC */
@@ -396,7 +326,7 @@ calltrap:
       jne     3f
       INTRFASTEXIT
3:     sti
-       movabsq $spl_lowered,%rdi
+       leaq    spl_lowered(%rip),%rdi
       movl    CPUVAR(ILEVEL),%esi
       movl    %ebx,%edx
       xorq    %rax,%rax
@@ -601,7 +531,6 @@ KIDTVEC(resume_xen_upcall)
2:
       movq    $(1 << LIR_XEN),%rax
       orq     %rax,CPUVAR(IPENDING)
-3:
       INTRFASTEXIT
#endif /* NXEN > 0 */

@@ -636,7 +565,6 @@ KIDTVEC(resume_hyperv_upcall)
2:
       movq    $(1 << LIR_HYPERV),%rax
       orq     %rax,CPUVAR(IPENDING)
-3:
       INTRFASTEXIT
#endif /* NHYPERV > 0 */
#endif /* NLAPIC > 0 */
@@ -682,7 +610,7 @@ IDTVEC(intr_##name##num)                                            ;\
       SMAP_CLAC                                                       ;\
       incl    CPUVAR(IDEPTH)                                          ;\
       movq    IS_HANDLERS(%r14),%rbx                                  ;\
-6:                                                                     \
+6:     /* loop, walking chain of handlers */                           \
       movl    IH_LEVEL(%rbx),%r12d                                    ;\
       cmpl    %r13d,%r12d                                             ;\
       jle     7f                                                      ;\
@@ -693,6 +621,8 @@ IDTVEC(intr_##name##num)                                            ;\
       orl     %eax,%eax               /* should it be counted? */     ;\
       jz      4f                      /* no, skip it */               ;\
       incq    IH_COUNT(%rbx)          /* count the intrs */           ;\
+       cmpl    $2,%eax                 /* can't know if it was ours */ ;\
+       je      4f                      /* keep trying */               ;\
       cmpl    $0,_C_LABEL(intr_shared_edge)                           ;\
       jne     4f                      /* if no shared edges ... */    ;\
       orl     %eax,%eax               /* 1 means stop trying */       ;\
@@ -700,13 +630,13 @@ IDTVEC(intr_##name##num)                                          ;\
4:     movq    IH_NEXT(%rbx),%rbx      /* next handler in chain */     ;\
       testq   %rbx,%rbx                                               ;\
       jnz     6b                                                      ;\
-5:                                                                     \
+5:     /* successfully handled */                                      \
       cli                                                             ;\
       unmask(num)                     /* unmask it in hardware */     ;\
       late_ack(num)                                                   ;\
       sti                                                             ;\
       jmp     _C_LABEL(Xdoreti)       /* lower spl and do ASTs */     ;\
-7:                                                                     \
+7:     /* current IPL > handler's ih_level */                          \
       cli                                                             ;\
       movq    $(1 << num),%rax                                        ;\
       orq     %rax,CPUVAR(IPENDING)                                   ;\
@@ -714,16 +644,18 @@ IDTVEC(intr_##name##num)                                          ;\
       late_ack(num)                                                   ;\
       sti                                                             ;\
       jmp     _C_LABEL(Xdoreti)       /* lower spl and do ASTs */     ;\
-10:                                                                    \
+10:    /* currently masked */                                          \
       cli                                                             ;\
       movq    $(1 << num),%rax                                        ;\
       orq     %rax,CPUVAR(IPENDING)                                   ;\
       level_mask(num)                                                 ;\
       late_ack(num)                                                   ;\
       INTRFASTEXIT                                                    ;\
-9:                                                                     \
+9:     /* spurious interrupt */                                        \
       unmask(num)                                                     ;\
       late_ack(num)                                                   ;\
+       testb   $SEL_RPL,TF_CS(%rsp)                                    ;\
+       jnz     intr_user_exit                                          ;\
       INTRFASTEXIT

#define ICUADDR IO_ICU1
Index: sys/arch/amd64/amd64/via.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/via.c,v
retrieving revision 1.23
diff -u -p -r1.23 via.c
--- sys/arch/amd64/amd64/via.c  2 May 2017 11:47:49 -0000       1.23
+++ sys/arch/amd64/amd64/via.c  21 Jun 2018 11:54:01 -0000
@@ -317,18 +317,11 @@ static __inline void
viac3_cbc(void *cw, void *src, void *dst, void *key, int rep,
    void *iv)
{
-       unsigned int creg0;
-
-       creg0 = rcr0();         /* Permit access to SIMD/FPU path */
-       lcr0(creg0 & ~(CR0_EM|CR0_TS));
-
       /* Do the deed */
       __asm volatile("pushfq; popfq");
       __asm volatile("rep xcryptcbc" :
           : "b" (key), "a" (iv), "c" (rep), "d" (cw), "S" (src), "D" (dst)
           : "memory", "cc");
-
-       lcr0(creg0);
}

int
@@ -521,14 +514,8 @@ void
viac3_rnd(void *v)
{
       struct timeout *tmo = v;
-       unsigned int *p, i, rv, creg0, len = VIAC3_RNG_BUFSIZ;
+       unsigned int *p, i, rv, len = VIAC3_RNG_BUFSIZ;
       static int buffer[VIAC3_RNG_BUFSIZ + 2];        /* XXX why + 2? */
-#ifdef MULTIPROCESSOR
-       int s = splipi();
-#endif
-
-       creg0 = rcr0();         /* Permit access to SIMD/FPU path */
-       lcr0(creg0 & ~(CR0_EM|CR0_TS));

       /*
        * Here we collect the random data from the VIA C3 RNG.  We make
@@ -538,12 +525,6 @@ viac3_rnd(void *v)
       __asm volatile("rep xstorerng"
           : "=a" (rv) : "d" (3), "D" (buffer), "c" (len*sizeof(int))
           : "memory", "cc");
-
-       lcr0(creg0);
-
-#ifdef MULTIPROCESSOR
-       splx(s);
-#endif

       for (i = 0, p = buffer; i < VIAC3_RNG_BUFSIZ; i++, p++)
               add_true_randomness(*p);
Index: sys/arch/amd64/amd64/vm_machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vm_machdep.c,v
retrieving revision 1.40
diff -u -p -r1.40 vm_machdep.c
--- sys/arch/amd64/amd64/vm_machdep.c   12 Sep 2017 02:58:08 -0000      1.40
+++ sys/arch/amd64/amd64/vm_machdep.c   21 Jun 2018 11:54:01 -0000
@@ -73,19 +73,12 @@ cpu_fork(struct proc *p1, struct proc *p
    void (*func)(void *), void *arg)
{
       struct pcb *pcb = &p2->p_addr->u_pcb;
+       struct pcb *pcb1 = &p1->p_addr->u_pcb;
       struct trapframe *tf;
       struct switchframe *sf;

-       /*
-        * If fpuproc != p1, then the fpu h/w state is irrelevant and the
-        * state had better already be in the pcb.  This is true for forks
-        * but not for dumps.
-        *
-        * If fpuproc == p1, then we have to save the fpu h/w state to
-        * p1's pcb so that we can copy it.
-        */
-       if (p1->p_addr->u_pcb.pcb_fpcpu != NULL)
-               fpusave_proc(p1, 1);
+       /* Save the fpu h/w state to p1's pcb so that we can copy it. */
+       fpusave(&pcb1->pcb_savefpu);

       p2->p_md.md_flags = p1->p_md.md_flags;

@@ -93,7 +86,7 @@ cpu_fork(struct proc *p1, struct proc *p
       if (p1 != curproc && p1 != &proc0)
               panic("cpu_fork: curproc");
#endif
-       *pcb = p1->p_addr->u_pcb;
+       *pcb = *pcb1;

       /*
        * Activate the address space.
@@ -137,11 +130,6 @@ cpu_fork(struct proc *p1, struct proc *p
void
cpu_exit(struct proc *p)
{
-
-       /* If we were using the FPU, forget about it. */
-       if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
-               fpusave_proc(p, 0);
-
       pmap_deactivate(p);
       sched_exit(p);
}
Index: sys/arch/amd64/amd64/vmm.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.170
diff -u -p -r1.170 vmm.c
--- sys/arch/amd64/amd64/vmm.c  8 Sep 2017 05:36:51 -0000       1.170
+++ sys/arch/amd64/amd64/vmm.c  21 Jun 2018 11:54:01 -0000
@@ -3584,39 +3584,67 @@ vcpu_must_stop(struct vcpu *vcpu)
}

/*
- * vmm_fpusave
+ * vmm_fpurestore
 *
- * Modified version of fpusave_cpu from fpu.c that only saves the FPU context
- * and does not call splipi/splx. Must be called with interrupts disabled.
+ * Restore the guest's FPU state, saving the existing userland thread's
+ * FPU context if necessary.  Must be called with interrupts disabled.
 */
-void
-vmm_fpusave(void)
+int
+vmm_fpurestore(struct vcpu *vcpu)
{
-       struct proc *p;
       struct cpu_info *ci = curcpu();

-       p = ci->ci_fpcurproc;
-       if (p == NULL)
-               return;
+       /* save vmmd's FPU state if we haven't already */
+       if (ci->ci_flags & CPUF_USERXSTATE) {
+               ci->ci_flags &= ~CPUF_USERXSTATE;
+               fpusavereset(&curproc->p_addr->u_pcb.pcb_savefpu);
+       }

-       if (ci->ci_fpsaving != 0)
-               panic("%s: recursive save!", __func__);
-       /*
-        * Set ci->ci_fpsaving, so that any pending exception will be
-        * thrown away.  (It will be caught again if/when the FPU
-        * state is restored.)
-        */
-       ci->ci_fpsaving = 1;
-       if (xsave_mask)
-               xsave(&p->p_addr->u_pcb.pcb_savefpu, xsave_mask);
-       else
-               fxsave(&p->p_addr->u_pcb.pcb_savefpu);
-       ci->ci_fpsaving = 0;
+       if (vcpu->vc_fpuinited) {
+               /* Restore guest XCR0 and FPU context */
+               if (vcpu->vc_gueststate.vg_xcr0 & ~xsave_mask) {
+                       DPRINTF("%s: guest attempted to set invalid %s\n"
+                           __func__, "bits in xcr0");
+                       return EINVAL;
+               }

-       p->p_addr->u_pcb.pcb_cr0 |= CR0_TS;
+               if (xrstor_user(&vcpu->vc_g_fpu, xsave_mask)) {
+                       DPRINTF("%s: guest attempted to set invalid %s\n"
+                           __func__, "xsave/xrstor state");
+                       return EINVAL;
+               }
+       }
+
+       if (xsave_mask) {
+               /* Restore guest %xcr0 */
+               xsetbv(0, vcpu->vc_gueststate.vg_xcr0);
+       }

-       p->p_addr->u_pcb.pcb_fpcpu = NULL;
-       ci->ci_fpcurproc = NULL;
+       return 0;
+}
+
+/*
+ * vmm_fpusave
+ *
+ * Save the guest's FPU state.  Must be called with interrupts disabled.
+ */
+void
+vmm_fpusave(struct vcpu *vcpu)
+{
+       if (xsave_mask) {
+               /* Save guest %xcr0 */
+               vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
+
+               /* Restore host %xcr0 */
+               xsetbv(0, xsave_mask);
+       }
+
+       /*
+        * Save full copy of FPU state - guest content is always
+        * a subset of host's save area (see xsetbv exit handler)
+        */
+       fpusavereset(&vcpu->vc_g_fpu);
+       vcpu->vc_fpuinited = 1;
}

/*
@@ -3839,39 +3867,10 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v

               /* Disable interrupts and save the current FPU state. */
               disable_intr();
-               clts();
-               vmm_fpusave();
-
-               /* Initialize the guest FPU if not inited already */
-               if (!vcpu->vc_fpuinited) {
-                       fninit();
-                       bzero(&vcpu->vc_g_fpu.fp_fxsave,
-                           sizeof(vcpu->vc_g_fpu.fp_fxsave));
-                       vcpu->vc_g_fpu.fp_fxsave.fx_fcw =
-                           __INITIAL_NPXCW__;
-                       vcpu->vc_g_fpu.fp_fxsave.fx_mxcsr =
-                           __INITIAL_MXCSR__;
-                       fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
-
-                       vcpu->vc_fpuinited = 1;
-               }
-
-               if (xsave_mask) {
-                       /* Restore guest XCR0 and FPU context */
-                       if (vcpu->vc_gueststate.vg_xcr0 & ~xsave_mask) {
-                               DPRINTF("%s: guest attempted to set invalid "
-                                   "bits in xcr0\n", __func__);
-                               ret = EINVAL;
-                               stts();
-                               enable_intr();
-                               break;
-                       }
-
-                       /* Restore guest %xcr0 */
-                       xrstor(&vcpu->vc_g_fpu, xsave_mask);
-                       xsetbv(0, vcpu->vc_gueststate.vg_xcr0);
-               } else
-                       fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
+               if ((ret = vmm_fpurestore(vcpu))) {
+                       enable_intr();
+                       break;
+               }

               KERNEL_UNLOCK();
               ret = vmx_enter_guest(&vcpu->vc_control_pa,
@@ -3882,27 +3881,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
                * the guest FPU state still possibly on the CPU. Save the FPU
                * state before re-enabling interrupts.
                */
-               if (xsave_mask) {
-                       /* Save guest %xcr0 */
-                       vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
-
-                       /* Restore host %xcr0 */
-                       xsetbv(0, xsave_mask);
-
-                       /*
-                        * Save full copy of FPU state - guest content is
-                        * always a subset of host's save area (see xsetbv
-                        * exit handler)
-                        */
-                       xsave(&vcpu->vc_g_fpu, xsave_mask);
-               } else
-                       fxsave(&vcpu->vc_g_fpu);
-
-               /*
-                * FPU state is invalid, set CR0_TS to force DNA trap on next
-                * access.
-                */
-               stts();
+               vmm_fpusave(vcpu);

               enable_intr();

@@ -5715,39 +5694,10 @@ vcpu_run_svm(struct vcpu *vcpu, struct v

               /* Disable interrupts and save the current FPU state. */
               disable_intr();
-               clts();
-               vmm_fpusave();
-
-               /* Initialize the guest FPU if not inited already */
-               if (!vcpu->vc_fpuinited) {
-                       fninit();
-                       bzero(&vcpu->vc_g_fpu.fp_fxsave,
-                           sizeof(vcpu->vc_g_fpu.fp_fxsave));
-                       vcpu->vc_g_fpu.fp_fxsave.fx_fcw =
-                           __INITIAL_NPXCW__;
-                       vcpu->vc_g_fpu.fp_fxsave.fx_mxcsr =
-                           __INITIAL_MXCSR__;
-                       fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
-
-                       vcpu->vc_fpuinited = 1;
-               }
-
-               if (xsave_mask) {
-                       /* Restore guest XCR0 and FPU context */
-                       if (vcpu->vc_gueststate.vg_xcr0 & ~xsave_mask) {
-                               DPRINTF("%s: guest attempted to set invalid "
-                                   "bits in xcr0\n", __func__);
-                               ret = EINVAL;
-                               stts();
-                               enable_intr();
-                               break;
-                       }
-
-                       /* Restore guest %xcr0 */
-                       xrstor(&vcpu->vc_g_fpu, xsave_mask);
-                       xsetbv(0, vcpu->vc_gueststate.vg_xcr0);
-               } else
-                       fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
+               if ((ret = vmm_fpurestore(vcpu))) {
+                       enable_intr();
+                       break;
+               }

               KERNEL_UNLOCK();

@@ -5761,27 +5711,7 @@ vcpu_run_svm(struct vcpu *vcpu, struct v
                * the guest FPU state still possibly on the CPU. Save the FPU
                * state before re-enabling interrupts.
                */
-               if (xsave_mask) {
-                       /* Save guest %xcr0 */
-                       vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
-
-                       /* Restore host %xcr0 */
-                       xsetbv(0, xsave_mask);
-
-                       /*
-                        * Save full copy of FPU state - guest content is
-                        * always a subset of host's save area (see xsetbv
-                        * exit handler)
-                        */
-                       xsave(&vcpu->vc_g_fpu, xsave_mask);
-               } else
-                       fxsave(&vcpu->vc_g_fpu);
-
-               /*
-                * FPU state is invalid, set CR0_TS to force DNA trap on next
-                * access.
-                */
-               stts();
+               vmm_fpusave(vcpu);

               enable_intr();

Index: sys/arch/amd64/include/codepatch.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/codepatch.h,v
retrieving revision 1.4
diff -u -p -r1.4 codepatch.h
--- sys/arch/amd64/include/codepatch.h  25 Aug 2017 19:28:48 -0000      1.4
+++ sys/arch/amd64/include/codepatch.h  21 Jun 2018 11:54:01 -0000
@@ -50,6 +50,8 @@ void codepatch_call(uint16_t tag, void *
#define CPTAG_STAC             1
#define CPTAG_CLAC             2
#define CPTAG_EOI              3
+#define CPTAG_XRSTOR           4
+#define CPTAG_XSAVE            5

/*
 * As stac/clac SMAP instructions are 3 bytes, we want the fastest
Index: sys/arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.114.4.1
diff -u -p -r1.114.4.1 cpu.h
--- sys/arch/amd64/include/cpu.h        26 Feb 2018 12:29:48 -0000      1.114.4.1
+++ sys/arch/amd64/include/cpu.h        21 Jun 2018 11:54:01 -0000
@@ -115,10 +115,6 @@ struct cpu_info {
       u_int64_t ci_intr_rsp;  /* U<-->K trampoline stack */
       u_int64_t ci_user_cr3;  /* U-K page table */

-       struct proc *ci_fpcurproc;
-       struct proc *ci_fpsaveproc;
-       int ci_fpsaving;
-
       struct pcb *ci_curpcb;
       struct pcb *ci_idle_pcb;

@@ -216,9 +212,9 @@ struct cpu_info {
#define CPUF_IDENTIFIED        0x0020          /* CPU has been identified */

#define CPUF_CONST_TSC 0x0040          /* CPU has constant TSC */
-#define CPUF_USERSEGS_BIT      7       /* CPU has curproc's segments */
-#define CPUF_USERSEGS  (1<<CPUF_USERSEGS_BIT)          /* and FS.base */
+#define CPUF_USERSEGS  0x0080          /* CPU has curproc's segs and FS.base */
#define CPUF_INVAR_TSC 0x0100          /* CPU has invariant TSC */
+#define CPUF_USERXSTATE        0x0200          /* CPU has curproc's xsave state */

#define CPUF_PRESENT   0x1000          /* CPU is present */
#define CPUF_RUNNING   0x2000          /* CPU is running */
@@ -268,7 +264,6 @@ extern void need_resched(struct cpu_info
extern struct cpu_info *cpu_info[MAXCPUS];

void cpu_boot_secondary_processors(void);
-void cpu_init_idle_pcbs(void);

void cpu_kick(struct cpu_info *);
void cpu_unidle(struct cpu_info *);
@@ -371,7 +366,6 @@ void        dumpconf(void);
void   cpu_reset(void);
void   x86_64_proc0_tss_ldt_init(void);
void   x86_64_bufinit(void);
-void   x86_64_init_pcb_tss_ldt(struct cpu_info *);
void   cpu_proc_fork(struct proc *, struct proc *);
int    amd64_pa_used(paddr_t);
extern void (*cpu_idle_enter_fcn)(void);
Index: sys/arch/amd64/include/fpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/fpu.h,v
retrieving revision 1.12
diff -u -p -r1.12 fpu.h
--- sys/arch/amd64/include/fpu.h        27 Apr 2017 06:16:39 -0000      1.12
+++ sys/arch/amd64/include/fpu.h        21 Jun 2018 11:54:01 -0000
@@ -7,10 +7,11 @@
#include <sys/types.h>

/*
- * amd64 only uses the extended save/restore format used
- * by fxsave/fsrestore, to always deal with the SSE registers,
- * which are part of the ABI to pass floating point values.
- * Must be stored in memory on a 16-byte boundary.
+ * If the CPU supports xsave/xrstor then we use them so that we can provide
+ * AVX support.  Otherwise we require fxsave/fxrstor, as the SSE registers
+ * are part of the ABI for passing floating point values.
+ * While fxsave/fxrstor only required 16-byte alignment for the save area,
+ * xsave/xrstor requires the save area to have 64-byte alignment.
 */

struct fxsave64 {
@@ -63,23 +64,22 @@ extern uint32_t     fpu_mxcsr_mask;
extern uint64_t        xsave_mask;

void fpuinit(struct cpu_info *);
-void fpudrop(void);
-void fpudiscard(struct proc *);
void fputrap(struct trapframe *);
-void fpusave_proc(struct proc *, int);
-void fpusave_cpu(struct cpu_info *, int);
+void fpusave(struct savefpu *);
+void fpusavereset(struct savefpu *);
void fpu_kernel_enter(void);
void fpu_kernel_exit(void);

+int    xrstor_user(struct savefpu *_addr, uint64_t _mask);
+#define        fpureset() \
+       xrstor_user(&proc0.p_addr->u_pcb.pcb_savefpu, xsave_mask)
+
#define fninit()               __asm("fninit")
#define fwait()                        __asm("fwait")
-#define fnclex()               __asm("fnclex")
+/* should be fxsave64, but where we use this it doesn't matter */
#define fxsave(addr)           __asm("fxsave %0" : "=m" (*addr))
-#define fxrstor(addr)          __asm("fxrstor %0" : : "m" (*addr))
#define ldmxcsr(addr)          __asm("ldmxcsr %0" : : "m" (*addr))
#define fldcw(addr)            __asm("fldcw %0" : : "m" (*addr))
-#define clts()                 __asm("clts")
-#define stts()                 lcr0(rcr0() | CR0_TS)

static inline void
xsave(struct savefpu *addr, uint64_t mask)
@@ -88,18 +88,9 @@ xsave(struct savefpu *addr, uint64_t mas

       lo = mask;
       hi = mask >> 32;
+       /* should be xsave64, but where we use this it doesn't matter */
       __asm volatile("xsave %0" : "=m" (*addr) : "a" (lo), "d" (hi) :
           "memory");
-}
-
-static inline void
-xrstor(struct savefpu *addr, uint64_t mask)
-{
-       uint32_t lo, hi;
-
-       lo = mask;
-       hi = mask >> 32;
-       __asm volatile("xrstor %0" : : "m" (*addr), "a" (lo), "d" (hi));
}

#endif
Index: sys/arch/amd64/include/intrdefs.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/intrdefs.h,v
retrieving revision 1.16
diff -u -p -r1.16 intrdefs.h
--- sys/arch/amd64/include/intrdefs.h   22 Jun 2016 01:12:38 -0000      1.16
+++ sys/arch/amd64/include/intrdefs.h   21 Jun 2018 11:54:01 -0000
@@ -75,8 +75,6 @@

#define X86_IPI_HALT                   0x00000001
#define X86_IPI_NOP                    0x00000002
-#define X86_IPI_FLUSH_FPU              0x00000004
-#define X86_IPI_SYNCH_FPU              0x00000008
#define X86_IPI_TLB                    0x00000010
#define X86_IPI_MTRR                   0x00000020
#define X86_IPI_SETPERF                        0x00000040
@@ -84,10 +82,10 @@
#define X86_IPI_START_VMM              0x00000100
#define X86_IPI_STOP_VMM               0x00000200

-#define X86_NIPI                       10
+#define X86_NIPI                       11

-#define X86_IPI_NAMES { "halt IPI", "nop IPI", "FPU flush IPI", \
-                        "FPU synch IPI", "TLB shootdown IPI", \
+#define X86_IPI_NAMES { "halt IPI", "nop IPI", NULL, \
+                        NULL, "TLB shootdown IPI", \
                        "MTRR update IPI", "setperf IPI", "ddb IPI", \
                        "VMM start IPI", "VMM stop IPI" }

Index: sys/arch/amd64/include/pcb.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/pcb.h,v
retrieving revision 1.16
diff -u -p -r1.16 pcb.h
--- sys/arch/amd64/include/pcb.h        26 Apr 2017 07:05:24 -0000      1.16
+++ sys/arch/amd64/include/pcb.h        21 Jun 2018 11:54:01 -0000
@@ -69,7 +69,6 @@

#include <sys/signal.h>

-#include <machine/tss.h>
#include <machine/fpu.h>

/*
@@ -84,9 +83,7 @@ struct pcb {
       u_int64_t       pcb_kstack;     /* kernel stack address */
       u_int64_t       pcb_fsbase;     /* per-thread offset: %fs */
       caddr_t pcb_onfault;            /* copyin/out fault recovery */
-       struct  cpu_info *pcb_fpcpu;    /* cpu holding our fp state. */
       struct  pmap *pcb_pmap;         /* back pointer to our pmap */
-       int     pcb_cr0;                /* saved image of CR0 */
};

#ifdef _KERNEL
Index: sys/arch/amd64/include/proc.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/proc.h,v
retrieving revision 1.9
diff -u -p -r1.9 proc.h
--- sys/arch/amd64/include/proc.h       13 Apr 2017 03:52:25 -0000      1.9
+++ sys/arch/amd64/include/proc.h       21 Jun 2018 11:54:01 -0000
@@ -46,7 +46,6 @@ struct mdproc {
};

/* md_flags */
-#define        MDP_USEDFPU     0x0001  /* has used the FPU */
#define MDP_IRET       0x0002  /* return via iret, not sysret */
                               /* (iret can restore r11 and rcx) */

Index: sys/arch/amd64/include/specialreg.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/specialreg.h,v
retrieving revision 1.61.4.1
diff -u -p -r1.61.4.1 specialreg.h
--- sys/arch/amd64/include/specialreg.h 26 Feb 2018 12:29:48 -0000      1.61.4.1
+++ sys/arch/amd64/include/specialreg.h 21 Jun 2018 11:54:01 -0000
@@ -1386,3 +1386,15 @@
#define PAT_WB          0x6UL
#define PAT_UCMINUS     0x7UL

+/*
+ * XSAVE subfeatures (cpuid 0xd, leaf 1)
+ */
+#define XSAVE_XSAVEOPT         0x1UL
+#define XSAVE_XSAVEC           0x2UL
+#define XSAVE_XGETBV1          0x4UL
+#define XSAVE_XSAVES           0x8UL
+
+/*
+ * Default cr0 flags.
+ */
+#define CR0_DEFAULT    (CR0_PE|CR0_PG|CR0_NE|CR0_WP)