untrusted comment: signature from openbsd 6.2 base secret key

untrusted comment: signature from openbsd 6.2 base secret key
RWRVWzAMgtyg7h6Z/ES+ftCrC3y4jz05b9Q4N4uIZDqQEzb7lw6vB6BGumpp3us1ydI/8HGsYSlzPUl7ai/pMISPf6LswZDJZAI=

OpenBSD 6.2 errata 017, June 24, 2018:

Intel CPUs speculatively access FPU registers even when FPU is disabled,
so data (including AES keys) from previous contexts could be discovered
if using lazy-save approach. Switch to eager-saving approach.

Apply by doing:
signify -Vep /etc/signify/openbsd-62-base.pub -x 017_intelfpu.patch.sig \
-m - | (cd /usr/src && patch -p0)

And then rebuild and install the kernel:
KK=`sysctl -n kern.osversion | cut -d# -f1`
cd /usr/src/sys/arch/`machine`/compile/$KK
make obj
make config
make
make install

Index: sys/arch/amd64/amd64/acpi_machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_machdep.c,v
retrieving revision 1.78
diff -u -p -r1.78 acpi_machdep.c
--- sys/arch/amd64/amd64/acpi_machdep.c 27 Mar 2017 18:32:53 -0000 1.78
+++ sys/arch/amd64/amd64/acpi_machdep.c 21 Jun 2018 11:54:01 -0000
@@ -389,7 +389,7 @@ acpi_sleep_cpu(struct acpi_softc *sc, in
*/
if (acpi_savecpu()) {
/* Suspend path */
- fpusave_cpu(curcpu(), 1);
+ KASSERT((curcpu()->ci_flags & CPUF_USERXSTATE) == 0);
wbinvd();

#ifdef HIBERNATE
@@ -416,6 +416,7 @@ acpi_sleep_cpu(struct acpi_softc *sc, in
return (ECANCELED);
}
/* Resume path */
+ fpureset();

/* Reset the vectors */
sc->sc_facs->wakeup_vector = 0;
Index: sys/arch/amd64/amd64/acpi_wakecode.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/acpi_wakecode.S,v
retrieving revision 1.41
diff -u -p -r1.41 acpi_wakecode.S
--- sys/arch/amd64/amd64/acpi_wakecode.S 30 Aug 2017 23:40:22 -0000 1.41
+++ sys/arch/amd64/amd64/acpi_wakecode.S 21 Jun 2018 11:54:01 -0000
@@ -217,7 +217,7 @@ _C_LABEL(acpi_protected_mode_resume):

/* Reenable paging by setting the appropriate bits in CR0 */
movl %cr0,%eax
- orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP),%eax
+ orl $CR0_DEFAULT,%eax
movl %eax,%cr0

/* Flush the prefetch queue again */
Index: sys/arch/amd64/amd64/aesni.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/aesni.c,v
retrieving revision 1.42
diff -u -p -r1.42 aesni.c
--- sys/arch/amd64/amd64/aesni.c 8 Sep 2017 05:36:51 -0000 1.42
+++ sys/arch/amd64/amd64/aesni.c 21 Jun 2018 11:54:01 -0000
@@ -256,7 +256,9 @@ aesni_newsession(u_int32_t *sidp, struct
bzero(ses->ses_ghash->Z, GMAC_BLOCK_LEN);

/* prepare a hash subkey */
+ fpu_kernel_enter();
aesni_enc(ses, ses->ses_ghash->H, ses->ses_ghash->H);
+ fpu_kernel_exit();
break;

case CRYPTO_MD5_HMAC:
Index: sys/arch/amd64/amd64/autoconf.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/autoconf.c,v
retrieving revision 1.49
diff -u -p -r1.49 autoconf.c
--- sys/arch/amd64/amd64/autoconf.c 20 Jun 2017 21:05:46 -0000 1.49
+++ sys/arch/amd64/amd64/autoconf.c 21 Jun 2018 11:54:01 -0000
@@ -138,10 +138,6 @@ cpu_configure(void)

unmap_startup();

-#ifdef MULTIPROCESSOR
- cpu_init_idle_pcbs();
-#endif
-
lcr8(0);
spl0();
cold = 0;
Index: sys/arch/amd64/amd64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.107.2.1
diff -u -p -r1.107.2.1 cpu.c
--- sys/arch/amd64/amd64/cpu.c 26 Feb 2018 12:29:48 -0000 1.107.2.1
+++ sys/arch/amd64/amd64/cpu.c 21 Jun 2018 11:54:01 -0000
@@ -70,6 +70,7 @@
#include "pvbus.h"

#include <sys/param.h>
+#include <sys/proc.h>
#include <sys/timeout.h>
#include <sys/systm.h>
#include <sys/device.h>
@@ -77,6 +78,7 @@
#include <sys/memrange.h>
#include <dev/rndvar.h>
#include <sys/atomic.h>
+#include <sys/user.h>

#include <uvm/uvm_extern.h>

@@ -409,7 +411,6 @@ cpu_attach(struct device *parent, struct
pcb->pcb_kstack = kstack + USPACE - 16;
pcb->pcb_rbp = pcb->pcb_rsp = kstack + USPACE - 16;
pcb->pcb_pmap = pmap_kernel();
- pcb->pcb_cr0 = rcr0();
pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdirpa;
#endif

@@ -491,6 +492,28 @@ cpu_attach(struct device *parent, struct
#endif /* NVMM > 0 */
}

+static void
+replacexsave(void)
+{
+ extern long _xrstor, _xsave, _xsaveopt;
+ u_int32_t eax, ebx, ecx, edx;
+ static int replacedone = 0;
+ int s;
+
+ if (replacedone)
+ return;
+ replacedone = 1;
+
+ /* find out whether xsaveopt is supported */
+ CPUID_LEAF(0xd, 1, eax, ebx, ecx, edx);
+ s = splhigh();
+ codepatch_replace(CPTAG_XRSTOR, &_xrstor, 4);
+ codepatch_replace(CPTAG_XSAVE,
+ (eax & XSAVE_XSAVEOPT) ? &_xsaveopt : &_xsave, 4);
+ splx(s);
+}
+
+
/*
* Initialize the processor appropriately.
*/
@@ -498,6 +521,7 @@ cpu_attach(struct device *parent, struct
void
cpu_init(struct cpu_info *ci)
{
+ struct savefpu *sfp;
u_int cr4;

/* configure the CPU if needed */
@@ -509,7 +533,6 @@ cpu_init(struct cpu_info *ci)
*/
patinit(ci);

- lcr0(rcr0() | CR0_WP);
cr4 = rcr4() | CR4_DEFAULT;
if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMEP)
cr4 |= CR4_SMEP;
@@ -519,7 +542,7 @@ cpu_init(struct cpu_info *ci)
cr4 |= CR4_FSGSBASE;
if (ci->ci_feature_sefflags_ecx & SEFF0ECX_UMIP)
cr4 |= CR4_UMIP;
- if (cpu_ecxfeature & CPUIDECX_XSAVE)
+ if ((cpu_ecxfeature & CPUIDECX_XSAVE) && cpuid_level >= 0xd)
cr4 |= CR4_OSXSAVE;
lcr4(cr4);

@@ -532,9 +555,25 @@ cpu_init(struct cpu_info *ci)
xsave_mask |= XCR0_AVX;
xsetbv(0, xsave_mask);
CPUID_LEAF(0xd, 0, eax, ebx, ecx, edx);
- fpu_save_len = ebx;
+ if (CPU_IS_PRIMARY(ci)) {
+ fpu_save_len = ebx;
+ KASSERT(fpu_save_len <= sizeof(struct savefpu));
+ } else {
+ KASSERT(ebx == fpu_save_len);
+ }
+
+ replacexsave();
}

+ /* Give proc0 a clean FPU save area */
+ sfp = &proc0.p_addr->u_pcb.pcb_savefpu;
+ memset(sfp, 0, fpu_save_len);
+ if (xsave_mask) {
+ /* must not use xsaveopt here */
+ xsave(sfp, xsave_mask);
+ } else
+ fxsave(sfp);
+
#if NVMM > 0
/* Re-enable VMM if needed */
if (ci->ci_flags & CPUF_VMM)
@@ -602,24 +641,6 @@ cpu_boot_secondary_processors(void)
}

void
-cpu_init_idle_pcbs(void)
-{
- struct cpu_info *ci;
- u_long i;
-
- for (i=0; i < MAXCPUS; i++) {
- ci = cpu_info[i];
- if (ci == NULL)
- continue;
- if (ci->ci_idle_pcb == NULL)
- continue;
- if ((ci->ci_flags & CPUF_PRESENT) == 0)
- continue;
- x86_64_init_pcb_tss_ldt(ci);
- }
-}
-
-void
cpu_start_secondary(struct cpu_info *ci)
{
int i;
@@ -738,7 +759,6 @@ cpu_hatch(void *v)
panic("%s: already running!?", ci->ci_dev->dv_xname);
#endif

- lcr0(ci->ci_idle_pcb->pcb_cr0);
cpu_init_idt();
lapic_set_lvt();
gdt_init_cpu(ci);
@@ -780,15 +800,14 @@ cpu_debug_dump(void)
struct cpu_info *ci;
CPU_INFO_ITERATOR cii;

- db_printf("addr dev id flags ipis curproc fpcurproc\n");
+ db_printf("addr dev id flags ipis curproc\n");
CPU_INFO_FOREACH(cii, ci) {
- db_printf("%p %s %u %x %x %10p %10p\n",
+ db_printf("%p %s %u %x %x %10p\n",
ci,
ci->ci_dev == NULL ? "BOOT" : ci->ci_dev->dv_xname,
ci->ci_cpuid,
ci->ci_flags, ci->ci_ipis,
- ci->ci_curproc,
- ci->ci_fpcurproc);
+ ci->ci_curproc);
}
}
#endif
Index: sys/arch/amd64/amd64/db_interface.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/db_interface.c,v
retrieving revision 1.29
diff -u -p -r1.29 db_interface.c
--- sys/arch/amd64/amd64/db_interface.c 19 Jul 2017 14:34:10 -0000 1.29
+++ sys/arch/amd64/amd64/db_interface.c 21 Jun 2018 11:54:01 -0000
@@ -66,8 +66,8 @@
#endif

extern label_t *db_recover;
-extern char *trap_type[];
-extern int trap_types;
+extern const char * const trap_type[];
+extern const int trap_types;

#ifdef MULTIPROCESSOR
struct mutex ddb_mp_mutex =
Index: sys/arch/amd64/amd64/fpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/fpu.c,v
retrieving revision 1.37
diff -u -p -r1.37 fpu.c
--- sys/arch/amd64/amd64/fpu.c 4 Oct 2017 02:10:33 -0000 1.37
+++ sys/arch/amd64/amd64/fpu.c 21 Jun 2018 11:54:01 -0000
@@ -53,35 +53,13 @@
#include <machine/specialreg.h>
#include <machine/fpu.h>

-#include <dev/isa/isavar.h>
-
-int xrstor_user(struct savefpu *_addr, uint64_t _mask);
void trap(struct trapframe *);

/*
- * We do lazy initialization and switching using the TS bit in cr0 and the
- * MDP_USEDFPU bit in mdproc.
- *
- * DNA exceptions are handled like this:
- *
- * 1) If there is no FPU, return and go to the emulator.
- * 2) If someone else has used the FPU, save its state into that process' PCB.
- * 3a) If MDP_USEDFPU is not set, set it and initialize the FPU.
- * 3b) Otherwise, reload the process' previous FPU state.
- *
- * When a process is created or exec()s, its saved cr0 image has the TS bit
- * set and the MDP_USEDFPU bit clear. The MDP_USEDFPU bit is set when the
- * process first gets a DNA and the FPU is initialized. The TS bit is turned
- * off when the FPU is used, and turned on again later when the process' FPU
- * state is saved.
- */
-
-/*
* The mask of enabled XSAVE features.
*/
uint64_t xsave_mask;

-void fpudna(struct cpu_info *, struct trapframe *);
static int x86fpflags_to_siginfo(u_int32_t);

/*
@@ -101,7 +79,6 @@ uint32_t fpu_mxcsr_mask;
void
fpuinit(struct cpu_info *ci)
{
- lcr0(rcr0() & ~(CR0_EM|CR0_TS));
fninit();
if (fpu_mxcsr_mask == 0) {
struct fxsave64 fx __attribute__((aligned(16)));
@@ -113,7 +90,6 @@ fpuinit(struct cpu_info *ci)
else
fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
}
- lcr0(rcr0() | (CR0_TS));
}

/*
@@ -126,23 +102,18 @@ fpuinit(struct cpu_info *ci)
void
fputrap(struct trapframe *frame)
{
- struct proc *p = curcpu()->ci_fpcurproc;
+ struct cpu_info *ci = curcpu();
+ struct proc *p = curproc;
struct savefpu *sfp = &p->p_addr->u_pcb.pcb_savefpu;
u_int32_t mxcsr, statbits;
u_int16_t cw;
int code;
union sigval sv;

-#ifdef DIAGNOSTIC
- /*
- * At this point, fpcurproc should be curproc. If it wasn't,
- * the TS bit should be set, and we should have gotten a DNA exception.
- */
- if (p != curproc)
- panic("fputrap: wrong proc");
-#endif
+ KASSERT(ci->ci_flags & CPUF_USERXSTATE);
+ ci->ci_flags &= ~CPUF_USERXSTATE;
+ fpusavereset(sfp);

- fxsave(sfp);
if (frame->tf_trapno == T_XMM) {
mxcsr = sfp->fp_fxsave.fx_mxcsr;
statbits = mxcsr;
@@ -187,211 +158,21 @@ x86fpflags_to_siginfo(u_int32_t flags)
return (FPE_FLTINV);
}

-/*
- * Implement device not available (DNA) exception
- *
- * If we were the last process to use the FPU, we can simply return.
- * Otherwise, we save the previous state, if necessary, and restore our last
- * saved state.
- */
-void
-fpudna(struct cpu_info *ci, struct trapframe *frame)
-{
- struct savefpu *sfp;
- struct proc *p;
- int s;
-
- if (ci->ci_fpsaving) {
- printf("recursive fpu trap; cr0=%x\n", rcr0());
- return;
- }
-
- s = splipi();
-
-#ifdef MULTIPROCESSOR
- p = ci->ci_curproc;
-#else
- p = curproc;
-#endif
-
- /*
- * Initialize the FPU state to clear any exceptions. If someone else
- * was using the FPU, save their state.
- */
- if (ci->ci_fpcurproc != NULL && ci->ci_fpcurproc != p) {
- fpusave_cpu(ci, ci->ci_fpcurproc != &proc0);
- uvmexp.fpswtch++;
- }
- splx(s);
-
- if (p == NULL) {
- clts();
- return;
- }
-
- KDASSERT(ci->ci_fpcurproc == NULL);
-#ifndef MULTIPROCESSOR
- KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL);
-#else
- if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
- fpusave_proc(p, 1);
-#endif
-
- p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
- clts();
-
- s = splipi();
- ci->ci_fpcurproc = p;
- p->p_addr->u_pcb.pcb_fpcpu = ci;
- splx(s);
-
- sfp = &p->p_addr->u_pcb.pcb_savefpu;
-
- if ((p->p_md.md_flags & MDP_USEDFPU) == 0) {
- fninit();
- bzero(&sfp->fp_fxsave, sizeof(sfp->fp_fxsave));
- sfp->fp_fxsave.fx_fcw = __INITIAL_NPXCW__;
- sfp->fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
- fxrstor(&sfp->fp_fxsave);
- p->p_md.md_flags |= MDP_USEDFPU;
- } else {
- if (xsave_mask) {
- if (xrstor_user(sfp, xsave_mask)) {
- fpusave_proc(p, 0); /* faulted */
- frame->tf_trapno = T_PROTFLT;
- trap(frame);
- return;
- }
- } else {
- static double zero = 0.0;
-
- /*
- * amd fpu does not restore fip, fdp, fop on fxrstor
- * thus leaking other process's execution history.
- */
- fnclex();
- __asm volatile("ffree %%st(7)\n\tfldl %0" : : "m" (zero));
- fxrstor(sfp);
- }
- }
-}
-
-
-void
-fpusave_cpu(struct cpu_info *ci, int save)
-{
- struct proc *p;
- int s;
-
- KDASSERT(ci == curcpu());
-
- p = ci->ci_fpcurproc;
- if (p == NULL)
- return;
-
- if (save) {
-#ifdef DIAGNOSTIC
- if (ci->ci_fpsaving != 0)
- panic("fpusave_cpu: recursive save!");
-#endif
- /*
- * Set ci->ci_fpsaving, so that any pending exception will be
- * thrown away. (It will be caught again if/when the FPU
- * state is restored.)
- */
- clts();
- ci->ci_fpsaving = 1;
- if (xsave_mask)
- xsave(&p->p_addr->u_pcb.pcb_savefpu, xsave_mask);
- else
- fxsave(&p->p_addr->u_pcb.pcb_savefpu);
- ci->ci_fpsaving = 0;
- }
-
- stts();
- p->p_addr->u_pcb.pcb_cr0 |= CR0_TS;
-
- s = splipi();
- p->p_addr->u_pcb.pcb_fpcpu = NULL;
- ci->ci_fpcurproc = NULL;
- splx(s);
-}
-
-/*
- * Save p's FPU state, which may be on this processor or another processor.
- */
-void
-fpusave_proc(struct proc *p, int save)
-{
- struct cpu_info *ci = curcpu();
- struct cpu_info *oci;
-
- KDASSERT(p->p_addr != NULL);
-
- oci = p->p_addr->u_pcb.pcb_fpcpu;
- if (oci == NULL)
- return;
-
-#if defined(MULTIPROCESSOR)
- if (oci == ci) {
- int s = splipi();
- fpusave_cpu(ci, save);
- splx(s);
- } else {
- oci->ci_fpsaveproc = p;
- x86_send_ipi(oci,
- save ? X86_IPI_SYNCH_FPU : X86_IPI_FLUSH_FPU);
- while (p->p_addr->u_pcb.pcb_fpcpu != NULL)
- CPU_BUSY_CYCLE();
- }
-#else
- KASSERT(ci->ci_fpcurproc == p);
- fpusave_cpu(ci, save);
-#endif
-}
-
void
fpu_kernel_enter(void)
{
- struct cpu_info *ci = curcpu();
- uint32_t cw;
- int s;
-
- /*
- * Fast path. If the kernel was using the FPU before, there
- * is no work to do besides clearing TS.
- */
- if (ci->ci_fpcurproc == &proc0) {
- clts();
- return;
- }
-
- s = splipi();
+ struct cpu_info *ci = curcpu();

- if (ci->ci_fpcurproc != NULL) {
- fpusave_cpu(ci, 1);
- uvmexp.fpswtch++;
+ /* save curproc's FPU state if we haven't already */
+ if (ci->ci_flags & CPUF_USERXSTATE) {
+ ci->ci_flags &= ~CPUF_USERXSTATE;
+ fpusavereset(&curproc->p_addr->u_pcb.pcb_savefpu);
}
-
- /* Claim the FPU */
- ci->ci_fpcurproc = &proc0;
-
- splx(s);
-
- /* Disable DNA exceptions */
- clts();
-
- /* Initialize the FPU */
- fninit();
- cw = __INITIAL_NPXCW__;
- fldcw(&cw);
- cw = __INITIAL_MXCSR__;
- ldmxcsr(&cw);
}

void
fpu_kernel_exit(void)
{
- /* Enable DNA exceptions */
- stts();
+ /* make sure we don't leave anything in the registers */
+ fpureset();
}
Index: sys/arch/amd64/amd64/genassym.cf
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/genassym.cf,v
retrieving revision 1.31.8.1
diff -u -p -r1.31.8.1 genassym.cf
--- sys/arch/amd64/amd64/genassym.cf 26 Feb 2018 12:29:48 -0000 1.31.8.1
+++ sys/arch/amd64/amd64/genassym.cf 21 Jun 2018 11:54:01 -0000
@@ -94,9 +94,8 @@ member pcb_rbp
member pcb_kstack
member pcb_fsbase
member pcb_onfault
-member pcb_fpcpu
member pcb_pmap
-member pcb_cr0
+member pcb_savefpu

struct pmap
member pm_cpus
@@ -131,7 +130,8 @@ member CPU_INFO_USER_CR3 ci_user_cr3
member CPU_INFO_KERN_RSP ci_kern_rsp
member CPU_INFO_INTR_RSP ci_intr_rsp

-export CPUF_USERSEGS_BIT
+export CPUF_USERSEGS
+export CPUF_USERXSTATE

struct intrsource
member is_recurse
Index: sys/arch/amd64/amd64/identcpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/identcpu.c,v
retrieving revision 1.87.2.1
diff -u -p -r1.87.2.1 identcpu.c
--- sys/arch/amd64/amd64/identcpu.c 26 Feb 2018 12:29:48 -0000 1.87.2.1
+++ sys/arch/amd64/amd64/identcpu.c 21 Jun 2018 11:54:01 -0000
@@ -217,6 +217,11 @@ const struct {
{ CPUIDEDX_ITSC, "ITSC" },
}, cpu_amdspec_ebxfeatures[] = {
{ CPUIDEBX_IBPB, "IBPB" },
+}, cpu_xsave_extfeatures[] = {
+ { XSAVE_XSAVEOPT, "XSAVEOPT" },
+ { XSAVE_XSAVEC, "XSAVEC" },
+ { XSAVE_XGETBV1, "XGETBV1" },
+ { XSAVE_XSAVES, "XSAVES" },
};

int
@@ -651,6 +656,14 @@ identifycpu(struct cpu_info *ci)
printf(",%s",
cpu_amdspec_ebxfeatures[i].str);
}
+ }
+
+ /* xsave subfeatures */
+ if (cpuid_level >= 0xd) {
+ CPUID_LEAF(0xd, 1, val, dummy, dummy, dummy);
+ for (i = 0; i < nitems(cpu_xsave_extfeatures); i++)
+ if (val & cpu_xsave_extfeatures[i].bit)
+ printf(",%s", cpu_xsave_extfeatures[i].str);
}

if (cpu_meltdown)
Index: sys/arch/amd64/amd64/ipifuncs.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/ipifuncs.c,v
retrieving revision 1.28
diff -u -p -r1.28 ipifuncs.c
--- sys/arch/amd64/amd64/ipifuncs.c 23 Nov 2015 22:57:12 -0000 1.28
+++ sys/arch/amd64/amd64/ipifuncs.c 21 Jun 2018 11:54:01 -0000
@@ -62,9 +62,6 @@
void x86_64_ipi_nop(struct cpu_info *);
void x86_64_ipi_halt(struct cpu_info *);

-void x86_64_ipi_synch_fpu(struct cpu_info *);
-void x86_64_ipi_flush_fpu(struct cpu_info *);
-
#if NVMM > 0
void x86_64_ipi_start_vmm(struct cpu_info *);
void x86_64_ipi_stop_vmm(struct cpu_info *);
@@ -85,8 +82,8 @@ void (*ipifunc[X86_NIPI])(struct cpu_inf
{
x86_64_ipi_halt,
x86_64_ipi_nop,
- x86_64_ipi_flush_fpu,
- x86_64_ipi_synch_fpu,
+ NULL,
+ NULL,
NULL,
x86_64_ipi_reload_mtrr,
x86_setperf_ipi,
@@ -115,7 +112,6 @@ x86_64_ipi_halt(struct cpu_info *ci)
SCHED_ASSERT_UNLOCKED();
KASSERT(!__mp_lock_held(&kernel_lock));

- fpusave_cpu(ci, 1);
disable_intr();
lapic_disable();
wbinvd();
@@ -125,20 +121,6 @@ x86_64_ipi_halt(struct cpu_info *ci)
for(;;) {
__asm volatile("hlt");
}
-}
-
-void
-x86_64_ipi_flush_fpu(struct cpu_info *ci)
-{
- if (ci->ci_fpsaveproc == ci->ci_fpcurproc)
- fpusave_cpu(ci, 0);
-}
-
-void
-x86_64_ipi_synch_fpu(struct cpu_info *ci)
-{
- if (ci->ci_fpsaveproc == ci->ci_fpcurproc)
- fpusave_cpu(ci, 1);
}

#ifdef MTRR
Index: sys/arch/amd64/amd64/locore.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v
retrieving revision 1.89.2.1
diff -u -p -r1.89.2.1 locore.S
--- sys/arch/amd64/amd64/locore.S 26 Feb 2018 12:29:48 -0000 1.89.2.1
+++ sys/arch/amd64/amd64/locore.S 21 Jun 2018 11:54:01 -0000
@@ -113,10 +113,11 @@
#include <sys/syscall.h>

#include <machine/param.h>
+#include <machine/codepatch.h>
#include <machine/psl.h>
#include <machine/segments.h>
#include <machine/specialreg.h>
-#include <machine/trap.h>
+#include <machine/trap.h> /* T_PROTFLT */
#include <machine/frameasm.h>

#if NLAPIC > 0
@@ -345,7 +346,12 @@ ENTRY(cpu_switchto)
movb $SONPROC,P_STAT(%r12) # p->p_stat = SONPROC
SET_CURPROC(%r12,%rcx)

- movl CPUVAR(CPUID),%edi
+ movl CPUVAR(CPUID),%r9d
+
+ /* for the FPU/"extended CPU state" handling below */
+ movq xsave_mask(%rip),%rdx
+ movl %edx,%eax
+ shrq $32,%rdx

/* If old proc exited, don't bother. */
testq %r13,%r13
@@ -358,7 +364,7 @@ ENTRY(cpu_switchto)
* %rax, %rcx - scratch
* %r13 - old proc, then old pcb
* %r12 - new proc
- * %edi - cpuid
+ * %r9d - cpuid
*/

movq P_ADDR(%r13),%r13
@@ -366,16 +372,46 @@ ENTRY(cpu_switchto)
/* clear the old pmap's bit for the cpu */
movq PCB_PMAP(%r13),%rcx
lock
- btrq %rdi,PM_CPUS(%rcx)
+ btrq %r9,PM_CPUS(%rcx)

/* Save stack pointers. */
movq %rsp,PCB_RSP(%r13)
movq %rbp,PCB_RBP(%r13)

+ /*
+ * If the old proc ran in userspace then save the
+ * floating-point/"extended state" registers
+ */
+ testl $CPUF_USERXSTATE,CPUVAR(FLAGS)
+ jz .Lxstate_reset
+
+ movq %r13, %rdi
+#if PCB_SAVEFPU != 0
+ addq $PCB_SAVEFPU,%rdi
+#endif
+ CODEPATCH_START
+ .byte 0x48; fxsave (%rdi) /* really fxsave64 */
+ CODEPATCH_END(CPTAG_XSAVE)
+
switch_exited:
- /* did old proc run in userspace? then reset the segment regs */
- btrl $CPUF_USERSEGS_BIT, CPUVAR(FLAGS)
- jnc restore_saved
+ /* now clear the xstate */
+ movq proc0paddr(%rip),%rdi
+#if PCB_SAVEFPU != 0
+ addq $PCB_SAVEFPU,%rdi
+#endif
+ CODEPATCH_START
+ .byte 0x48; fxrstor (%rdi) /* really fxrstor64 */
+ CODEPATCH_END(CPTAG_XRSTOR)
+ andl $~CPUF_USERXSTATE,CPUVAR(FLAGS)
+
+.Lxstate_reset:
+ /*
+ * If the segment registers haven't been reset since the old proc
+ * ran in userspace then reset them now
+ */
+ testl $CPUF_USERSEGS,CPUVAR(FLAGS)
+ jz restore_saved
+ andl $~CPUF_USERSEGS,CPUVAR(FLAGS)

/* set %ds, %es, %fs, and %gs to expected value to prevent info leak */
movw $(GSEL(GUDATA_SEL, SEL_UPL)),%ax
@@ -432,32 +468,17 @@ restore_saved:
0:

/* set the new pmap's bit for the cpu */
- movl CPUVAR(CPUID),%edi
lock
- btsq %rdi,PM_CPUS(%rcx)
+ btsq %r9,PM_CPUS(%rcx)
#ifdef DIAGNOSTIC
jc _C_LABEL(switch_pmcpu_set)
#endif

switch_restored:
- /* Restore cr0 (including FPU state). */
- movl PCB_CR0(%r13),%ecx
-#ifdef MULTIPROCESSOR
- movq PCB_FPCPU(%r13),%r8
- cmpq CPUVAR(SELF),%r8
- jz 1f
- orl $CR0_TS,%ecx
-1:
-#endif
- movq %rcx,%cr0
-
SET_CURPCB(%r13)

/* Interrupts are okay again. */
sti
-
-switch_return:
-
popq %r15
popq %r14
popq %r13
@@ -497,7 +518,7 @@ ENTRY(cpu_idle_leave)

#ifdef DIAGNOSTIC
NENTRY(switch_pmcpu_set)
- movabsq $switch_active,%rdi
+ leaq switch_active(%rip),%rdi
call _C_LABEL(panic)
/* NOTREACHED */

@@ -529,7 +550,7 @@ IDTVEC(syscall)
* %rip and the original rflags has been copied to %r11. %cs and
* %ss have been updated to the kernel segments, but %rsp is still
* the user-space value.
- * First order of business is to swap to the kernel gs.base so that
+ * First order of business is to swap to the kernel GS.base so that
* we can access our struct cpu_info and use the scratch space there
* to switch to the kernel page tables (thank you, Intel), then
* switch to our kernel stack. Once that's in place we can
@@ -563,7 +584,7 @@ NENTRY(Xsyscall_untramp)
movq %r11, TF_RFLAGS(%rsp) /* old rflags from syscall insn */
movq $(GSEL(GUCODE_SEL, SEL_UPL)), TF_CS(%rsp)
movq %rcx,TF_RIP(%rsp)
- movq $2,TF_ERR(%rsp) /* ignored */
+ movq %rax,TF_ERR(%rsp) /* stash syscall # for SPL check */

movq CPUVAR(CURPROC),%r14
movq %rsp,P_MD_REGS(%r14) # save pointer to frame
@@ -590,8 +611,17 @@ NENTRY(Xsyscall_untramp)

/* Could registers have been changed that require an iretq? */
testl $MDP_IRET, P_MD_FLAGS(%r14)
- jne intr_fast_exit
+ jne intr_user_exit_post_ast
+
+ /* Restore FPU/"extended CPU state" if it's not already in the CPU */
+ testl $CPUF_USERXSTATE,CPUVAR(FLAGS)
+ jz .Lsyscall_restore_xstate
+
+ /* Restore FS.base if it's not already in the CPU */
+ testl $CPUF_USERSEGS,CPUVAR(FLAGS)
+ jz .Lsyscall_restore_fsbase

+.Lsyscall_restore_registers:
movq TF_RDI(%rsp),%rdi
movq TF_RSI(%rsp),%rsi
movq TF_R8(%rsp),%r8
@@ -604,17 +634,6 @@ NENTRY(Xsyscall_untramp)
movq TF_RBP(%rsp),%rbp
movq TF_RBX(%rsp),%rbx

- /* Restore FS.base if it's not already in the CPU */
- btsl $CPUF_USERSEGS_BIT,CPUVAR(FLAGS)
- jc 99f
- movq CPUVAR(CURPCB),%rdx
- movq PCB_FSBASE(%rdx),%rax
- movq %rax,%rdx
- shrq $32,%rdx
- movl $MSR_FSBASE,%ecx
- wrmsr
-99:
-
/*
* We need to finish reading from the trapframe, then switch
* to the user page tables, swapgs, and return. We need
@@ -642,11 +661,42 @@ KUENTRY(syscall_trampback)
sysretq

.text
+ .align 16,0xcc
+ /* in this case, need FS.base but not xstate, rarely happens */
+.Lsyscall_restore_fsbase: /* CPU doesn't have curproc's FS.base */
+ orl $CPUF_USERSEGS,CPUVAR(FLAGS)
+ movq CPUVAR(CURPCB),%rdi
+ jmp .Lsyscall_restore_fsbase_real
+
+ .align 16,0xcc
+.Lsyscall_restore_xstate: /* CPU doesn't have curproc's xstate */
+ orl $(CPUF_USERXSTATE|CPUF_USERSEGS),CPUVAR(FLAGS)
+ movq CPUVAR(CURPCB),%rdi
+ movq xsave_mask(%rip),%rdx
+ movl %edx,%eax
+ shrq $32,%rdx
+#if PCB_SAVEFPU != 0
+ addq $PCB_SAVEFPU,%rdi
+#endif
+ /* untouched state so can't fault */
+ CODEPATCH_START
+ .byte 0x48; fxrstor (%rdi) /* really fxrstor64 */
+ CODEPATCH_END(CPTAG_XRSTOR)
+#if PCB_SAVEFPU != 0
+ subq $PCB_SAVEFPU,%rdi
+#endif
+.Lsyscall_restore_fsbase_real:
+ movq PCB_FSBASE(%rdi),%rdx
+ movl %edx,%eax
+ shrq $32,%rdx
+ movl $MSR_FSBASE,%ecx
+ wrmsr
+ jmp .Lsyscall_restore_registers

#ifdef DIAGNOSTIC
.Lsyscall_spl_not_lowered:
- movabsq $spl_lowered, %rdi
- movl TF_RAX(%rsp),%esi
+ leaq spl_lowered(%rip), %rdi
+ movl TF_ERR(%rsp),%esi /* syscall # stashed above */
movl TF_RDI(%rsp),%edx
movl %ebx,%ecx
movl CPUVAR(ILEVEL),%r8d
@@ -676,15 +726,54 @@ NENTRY(proc_trampoline)

/*
- * Return via iretq, for real interrupts and signal returns
+ * Returning to userspace via iretq. We do things in this order:
+ * - check for ASTs
+ * - restore FPU/"extended CPU state" if it's not already in the CPU
+ * - DIAGNOSTIC: no more C calls after this, so check the SPL
+ * - restore FS.base if it's not already in the CPU
+ * - restore most registers
+ * - update the iret frame from the trapframe
+ * - finish reading from the trapframe
+ * - switch to the trampoline stack \
+ * - jump to the .kutext segment |-- Meltdown workaround
+ * - switch to the user page tables /
+ * - swapgs
+ * - iretq
*/
-NENTRY(intr_fast_exit)
+NENTRY(intr_user_exit)
#ifdef DIAGNOSTIC
pushfq
popq %rdx
testq $PSL_I,%rdx
- jnz .Lintr_exit_not_blocked
+ jnz .Lintr_user_exit_not_blocked
+#endif /* DIAGNOSTIC */
+
+ /* Check for ASTs */
+ CHECK_ASTPENDING(%r11)
+ je intr_user_exit_post_ast
+ CLEAR_ASTPENDING(%r11)
+ sti
+ movq %rsp,%rdi
+ call _C_LABEL(ast)
+ cli
+ jmp intr_user_exit
+
+intr_user_exit_post_ast:
+ /* Restore FPU/"extended CPU state" if it's not already in the CPU */
+ testl $CPUF_USERXSTATE,CPUVAR(FLAGS)
+ jz .Lintr_restore_xstate
+
+#ifdef DIAGNOSTIC
+ /* no more C calls after this, so check the SPL */
+ cmpl $0,CPUVAR(ILEVEL)
+ jne .Luser_spl_not_lowered
#endif /* DIAGNOSTIC */
+
+ /* Restore FS.base if it's not already in the CPU */
+ testl $CPUF_USERSEGS,CPUVAR(FLAGS)
+ jz .Lintr_restore_fsbase
+
+.Lintr_restore_registers:
movq TF_RDI(%rsp),%rdi
movq TF_RSI(%rsp),%rsi
movq TF_R8(%rsp),%r8
@@ -697,30 +786,7 @@ NENTRY(intr_fast_exit)
movq TF_RBP(%rsp),%rbp
movq TF_RBX(%rsp),%rbx

- testq $SEL_RPL,TF_CS(%rsp)
- je intr_exit_recurse /* returning back to kernel? */
-
- /* returning to userspace. XXX fix up iret frame here */
-
- /* restore FS.base if it's not already in the CPU */
- btsl $CPUF_USERSEGS_BIT,CPUVAR(FLAGS)
- jc 99f
- movq CPUVAR(CURPCB),%rdx /* for below */
- movq PCB_FSBASE(%rdx),%rax
- movq %rax,%rdx
- shrq $32,%rdx
- movl $MSR_FSBASE,%ecx
- wrmsr
-99:
/*
- * Returning to userspace. We need to go things in this order:
- * - update the iret frame from the trapframe
- * - finish reading from the trapframe
- * - switch to the trampoline stack
- * - jump to the .kutext segment
- * - switch to the user page tables
- * - swapgs
- * - iretq
* To get the final value for the register that was used
* for the mov to %cr3, we need access to somewhere accessible
* on the user page tables, so we save it in CPUVAR(SCRATCH)
@@ -758,7 +824,101 @@ KUENTRY(iretq_tramp)
_C_LABEL(doreti_iret):
iretq

-NENTRY(intr_exit_recurse)
+ .text
+ .align 16,0xcc
+.Lintr_restore_xstate: /* CPU doesn't have curproc's xstate */
+ orl $CPUF_USERXSTATE,CPUVAR(FLAGS)
+ movq CPUVAR(CURPCB),%rdi
+#if PCB_SAVEFPU != 0
+ addq $PCB_SAVEFPU,%rdi
+#endif
+ movq xsave_mask(%rip),%rsi
+ call xrstor_user
+ testl %eax,%eax
+ jnz .Lintr_xrstor_faulted
+.Lintr_restore_fsbase: /* CPU doesn't have curproc's FS.base */
+ orl $CPUF_USERSEGS,CPUVAR(FLAGS)
+ movq CPUVAR(CURPCB),%rdx
+ movq PCB_FSBASE(%rdx),%rdx
+ movl %edx,%eax
+ shrq $32,%rdx
+ movl $MSR_FSBASE,%ecx
+ wrmsr
+ jmp .Lintr_restore_registers
+
+.Lintr_xrstor_faulted:
+ /*
+ * xrstor faulted; we need to reset the FPU state and call trap()
+ * to post a signal, which requires interrupts be enabled.
+ */
+ sti
+ movq proc0paddr(%rip),%rdi
+#if PCB_SAVEFPU != 0
+ addq $PCB_SAVEFPU,%rdi
+#endif
+ CODEPATCH_START
+ .byte 0x48; fxrstor (%rdi) /* really fxrstor64 */
+ CODEPATCH_END(CPTAG_XRSTOR)
+ movq $T_PROTFLT,TF_TRAPNO(%rsp)
+ jmp recall_trap
+
+#ifdef DIAGNOSTIC
+.Lintr_user_exit_not_blocked:
+ movl warn_once(%rip),%edi
+ testl %edi,%edi
+ jnz 1f
+ incl %edi
+ movl %edi,warn_once(%rip)
+ leaq .Lnot_blocked(%rip),%rdi
+ call _C_LABEL(printf)
+#ifdef DDB
+ int $3
+#endif /* DDB */
+1: cli
+ jmp intr_user_exit
+
+.Luser_spl_not_lowered:
+ sti
+ leaq intr_spl_lowered(%rip),%rdi
+ movl CPUVAR(ILEVEL),%esi
+ xorl %edx,%edx /* always SPL zero for userspace */
+ xorl %eax,%eax
+ call _C_LABEL(printf)
+#ifdef DDB
+ int $3
+#endif /* DDB */
+ movl $0,CPUVAR(ILEVEL)
+ cli
+ jmp intr_user_exit
+
+ .section .rodata
+intr_spl_lowered:
+ .asciz "WARNING: SPL NOT LOWERED ON TRAP EXIT %x %x\n"
+ .text
+#endif /* DIAGNOSTIC */
+
+
+/*
+ * Return to supervisor mode from trap or interrupt
+ */
+NENTRY(intr_fast_exit)
+#ifdef DIAGNOSTIC
+ pushfq
+ popq %rdx
+ testq $PSL_I,%rdx
+ jnz .Lintr_exit_not_blocked
+#endif /* DIAGNOSTIC */
+ movq TF_RDI(%rsp),%rdi
+ movq TF_RSI(%rsp),%rsi
+ movq TF_R8(%rsp),%r8
+ movq TF_R9(%rsp),%r9
+ movq TF_R10(%rsp),%r10
+ movq TF_R12(%rsp),%r12
+ movq TF_R13(%rsp),%r13
+ movq TF_R14(%rsp),%r14
+ movq TF_R15(%rsp),%r15
+ movq TF_RBP(%rsp),%rbp
+ movq TF_RBX(%rsp),%rbx
movq TF_RDX(%rsp),%rdx
movq TF_RCX(%rsp),%rcx
movq TF_R11(%rsp),%r11
@@ -813,7 +973,6 @@ NENTRY(intr_exit_recurse)

#ifdef DIAGNOSTIC
.Lintr_exit_not_blocked:
- xchgw %bx, %bx
movl warn_once(%rip),%edi
testl %edi,%edi
jnz 1f
@@ -837,18 +996,71 @@ warn_once:
.text
#endif

+/*
+ * FPU/"extended CPU state" handling
+ * int xrstor_user(sfp, mask)
+ * load given state, returns 0/1 if okay/it trapped
+ * void fpusave(sfp)
+ * save current state, but retain it in the FPU
+ * void fpusavereset(sfp)
+ * save current state and reset FPU to initial/kernel state
+ */
+
ENTRY(xrstor_user)
movq %rsi, %rdx
movl %esi, %eax
shrq $32, %rdx
.globl xrstor_fault
xrstor_fault:
- xrstor (%rdi)
+ CODEPATCH_START
+ .byte 0x48; fxrstor (%rdi) /* really fxrstor64 */
+ CODEPATCH_END(CPTAG_XRSTOR)
xorl %eax, %eax
ret
-ENTRY(xrstor_resume)
+NENTRY(xrstor_resume)
movl $1, %eax
ret
+END(xrstor_user)
+
+ENTRY(fpusave)
+ movq xsave_mask(%rip),%rdx
+ movl %edx,%eax
+ shrq $32,%rdx
+ CODEPATCH_START
+ .byte 0x48; fxsave (%rdi) /* really fxsave64 */
+ CODEPATCH_END(CPTAG_XSAVE)
+ ret
+END(fpusave)
+
+ENTRY(fpusavereset)
+ movq xsave_mask(%rip),%rdx
+ movl %edx,%eax
+ shrq $32,%rdx
+ CODEPATCH_START
+ .byte 0x48; fxsave (%rdi) /* really fxsave64 */
+ CODEPATCH_END(CPTAG_XSAVE)
+ movq proc0paddr(%rip),%rdi
+#if PCB_SAVEFPU != 0
+ addq $PCB_SAVEFPU,%rdi
+#endif
+ CODEPATCH_START
+ .byte 0x48; fxrstor (%rdi) /* really fxrstor64 */
+ CODEPATCH_END(CPTAG_XRSTOR)
+ ret
+END(fpusavereset)
+
+ .section .rodata
+ .globl _C_LABEL(_xrstor)
+_C_LABEL(_xrstor):
+ .byte 0x48; xrstor (%rdi) /* really xrstor64 */
+
+ .globl _C_LABEL(_xsave)
+_C_LABEL(_xsave):
+ .byte 0x48; xsave (%rdi) /* really xsave64 */
+
+ .globl _C_LABEL(_xsaveopt)
+_C_LABEL(_xsaveopt):
+ .byte 0x48; xsaveopt (%rdi) /* really xsaveopt64 */

ENTRY(pagezero)
movq $-PAGE_SIZE,%rdx
Index: sys/arch/amd64/amd64/locore0.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/locore0.S,v
retrieving revision 1.2.2.1
diff -u -p -r1.2.2.1 locore0.S
--- sys/arch/amd64/amd64/locore0.S 26 Feb 2018 12:29:48 -0000 1.2.2.1
+++ sys/arch/amd64/amd64/locore0.S 21 Jun 2018 11:54:01 -0000
@@ -601,7 +601,7 @@ write_efer:
* 4. Enable paging and the rest of it.
*/
movl %cr0,%eax
- orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP),%eax
+ orl $CR0_DEFAULT,%eax
movl %eax,%cr0
jmp compat
compat:
Index: sys/arch/amd64/amd64/machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.231.2.1
diff -u -p -r1.231.2.1 machdep.c
--- sys/arch/amd64/amd64/machdep.c 26 Feb 2018 12:29:48 -0000 1.231.2.1
+++ sys/arch/amd64/amd64/machdep.c 21 Jun 2018 11:54:01 -0000
@@ -395,7 +395,6 @@ x86_64_proc0_tss_ldt_init(void)
struct pcb *pcb;

cpu_info_primary.ci_curpcb = pcb = &proc0.p_addr->u_pcb;
- pcb->pcb_cr0 = rcr0();
pcb->pcb_fsbase = 0;
pcb->pcb_kstack = (u_int64_t)proc0.p_addr + USPACE - 16;
proc0.p_md.md_regs = (struct trapframe *)pcb->pcb_kstack - 1;
@@ -404,20 +403,6 @@ x86_64_proc0_tss_ldt_init(void)
lldt(0);
}

-/*
- * Set up TSS for a new PCB.
- */
-
-#ifdef MULTIPROCESSOR
-void
-x86_64_init_pcb_tss_ldt(struct cpu_info *ci)
-{
- struct pcb *pcb = ci->ci_idle_pcb;
-
- pcb->pcb_cr0 = rcr0();
-}
-#endif /* MULTIPROCESSOR */
-
bios_diskinfo_t *
bios_getdiskinfo(dev_t dev)
{
@@ -579,6 +564,7 @@ sendsig(sig_t catcher, int sig, int mask
struct trapframe *tf = p->p_md.md_regs;
struct sigacts *psp = p->p_p->ps_sigacts;
struct sigcontext ksc;
+ struct savefpu *sfp = &p->p_addr->u_pcb.pcb_savefpu;
siginfo_t ksi;
register_t sp, scp, sip;
u_long sss;
@@ -597,17 +583,19 @@ sendsig(sig_t catcher, int sig, int mask
sp &= ~15ULL; /* just in case */
sss = (sizeof(ksc) + 15) & ~15;

- if (p->p_md.md_flags & MDP_USEDFPU) {
- fpusave_proc(p, 1);
- sp -= fpu_save_len;
- ksc.sc_fpstate = (struct fxsave64 *)sp;
- if (copyout(&p->p_addr->u_pcb.pcb_savefpu.fp_fxsave,
- (void *)sp, fpu_save_len))
- sigexit(p, SIGILL);
+ /* Save FPU state to PCB if necessary, then copy it out */
+ if (curcpu()->ci_flags & CPUF_USERXSTATE) {
+ curcpu()->ci_flags &= ~CPUF_USERXSTATE;
+ fpusavereset(&p->p_addr->u_pcb.pcb_savefpu);
+ }
+ sp -= fpu_save_len;
+ ksc.sc_fpstate = (struct fxsave64 *)sp;
+ if (copyout(sfp, (void *)sp, fpu_save_len))
+ sigexit(p, SIGILL);

- /* Signal handlers get a completely clean FP state */
- p->p_md.md_flags &= ~MDP_USEDFPU;
- }
+ /* Now reset the FPU state in PCB */
+ memcpy(&p->p_addr->u_pcb.pcb_savefpu,
+ &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len);

sip = 0;
if (psp->ps_siginfo & sigmask(sig)) {
@@ -637,6 +625,9 @@ sendsig(sig_t catcher, int sig, int mask
tf->tf_rflags &= ~(PSL_T|PSL_D|PSL_VM|PSL_AC);
tf->tf_rsp = scp;
tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
+
+ /* The reset state _is_ the userspace state for this thread now */
+ curcpu()->ci_flags |= CPUF_USERXSTATE;
}

/*
@@ -681,16 +672,23 @@ sys_sigreturn(struct proc *p, void *v, r
!USERMODE(ksc.sc_cs, ksc.sc_eflags))
return (EINVAL);

- if (p->p_md.md_flags & MDP_USEDFPU)
- fpusave_proc(p, 0);
+ /* Current state is obsolete; toss it and force a reload */
+ if (curcpu()->ci_flags & CPUF_USERXSTATE) {
+ curcpu()->ci_flags &= ~CPUF_USERXSTATE;
+ fpureset();
+ }

- if (ksc.sc_fpstate) {
+ /* Copy in the FPU state to restore */
+ if (__predict_true(ksc.sc_fpstate != NULL)) {
struct fxsave64 *fx = &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave;

if ((error = copyin(ksc.sc_fpstate, fx, fpu_save_len)))
return (error);
fx->fx_mxcsr &= fpu_mxcsr_mask;
- p->p_md.md_flags |= MDP_USEDFPU;
+ } else {
+ /* shouldn't happen, but handle it */
+ memcpy(&p->p_addr->u_pcb.pcb_savefpu,
+ &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len);
}

ksc.sc_trapno = tf->tf_trapno;
@@ -707,6 +705,7 @@ sys_sigreturn(struct proc *p, void *v, r
* when a signal was being delivered, the process will be
* completely restored, including the userland %rcx and %r11
* registers which the 'sysretq' instruction cannot restore.
+ * Also need to make sure we can handle faulting on xrstor.
*/
p->p_md.md_flags |= MDP_IRET;

@@ -1092,10 +1091,19 @@ setregs(struct proc *p, struct exec_pack
{
struct trapframe *tf;

- /* If we were using the FPU, forget about it. */
- if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
- fpusave_proc(p, 0);
- p->p_md.md_flags &= ~MDP_USEDFPU;
+ /* Reset FPU state in PCB */
+ memcpy(&p->p_addr->u_pcb.pcb_savefpu,
+ &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len);
+
+ if (curcpu()->ci_flags & CPUF_USERXSTATE) {
+ /* state in CPU is obsolete; reset it */
+ fpureset();
+ } else {
+ /* the reset state _is_ the userspace state now */
+ curcpu()->ci_flags |= CPUF_USERXSTATE;
+ }
+
+ /* To reset all registers we have to return via iretq */
p->p_md.md_flags |= MDP_IRET;

reset_segs();
Index: sys/arch/amd64/amd64/mptramp.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/mptramp.S,v
retrieving revision 1.15
diff -u -p -r1.15 mptramp.S
--- sys/arch/amd64/amd64/mptramp.S 29 Jun 2017 08:14:36 -0000 1.15
+++ sys/arch/amd64/amd64/mptramp.S 21 Jun 2018 11:54:01 -0000
@@ -120,7 +120,7 @@ _C_LABEL(cpu_spinup_trampoline):
movw %ax, %ss
addr32 lgdtl (.Lmptramp_gdt32_desc) # load flat descriptor table
movl %cr0, %eax # get cr0
- orl $0x1, %eax # enable protected mode
+ orl $CR0_PE, %eax # enable protected mode
movl %eax, %cr0 # doit
ljmpl $0x8, $.Lmp_startup

@@ -179,7 +179,7 @@ _TRMP_LABEL(.Lmp_startup)
movl $.Lmptramp_jmp64,%eax

movl %cr0,%ecx # get control word
- orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP),%ecx
+ orl $CR0_DEFAULT,%ecx
movl %ecx, %cr0

ljmp *(%eax)
@@ -230,7 +230,7 @@ _C_LABEL(cpu_spinup_trampoline_end): #en
/* Switch address space. */
movq PCB_CR3(%rsi),%rax
movq %rax,%cr3
- movl PCB_CR0(%rsi),%eax
+ movl $CR0_DEFAULT,%eax
movq %rax,%cr0
call _C_LABEL(cpu_hatch)
/* NOTREACHED */
Index: sys/arch/amd64/amd64/process_machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/process_machdep.c,v
retrieving revision 1.14
diff -u -p -r1.14 process_machdep.c
--- sys/arch/amd64/amd64/process_machdep.c 28 Jun 2015 18:54:54 -0000 1.14
+++ sys/arch/amd64/amd64/process_machdep.c 21 Jun 2018 11:54:01 -0000
@@ -127,19 +127,6 @@ process_read_fpregs(struct proc *p, stru
{
struct fxsave64 *frame = process_fpframe(p);

- if (p->p_md.md_flags & MDP_USEDFPU) {
- fpusave_proc(p, 1);
- } else {
- /* Fake a FNINIT. */
- memset(frame, 0, sizeof(*regs));
- frame->fx_fcw = __INITIAL_NPXCW__;
- frame->fx_fsw = 0x0000;
- frame->fx_ftw = 0x00;
- frame->fx_mxcsr = __INITIAL_MXCSR__;
- frame->fx_mxcsr_mask = fpu_mxcsr_mask;
- p->p_md.md_flags |= MDP_USEDFPU;
- }
-
memcpy(&regs->fxstate, frame, sizeof(*regs));
return (0);
}
@@ -189,14 +176,11 @@ process_write_fpregs(struct proc *p, str
{
struct fxsave64 *frame = process_fpframe(p);

- if (p->p_md.md_flags & MDP_USEDFPU) {
- fpusave_proc(p, 0);
- } else {
- p->p_md.md_flags |= MDP_USEDFPU;
- }
-
memcpy(frame, &regs->fxstate, sizeof(*regs));
frame->fx_mxcsr &= fpu_mxcsr_mask;
+
+ /* force target to return via iretq so bogus xstate can be handled */
+ p->p_md.md_flags |= MDP_IRET;
return (0);
}

Index: sys/arch/amd64/amd64/spl.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/spl.S,v
retrieving revision 1.11.4.1
diff -u -p -r1.11.4.1 spl.S
--- sys/arch/amd64/amd64/spl.S 26 Feb 2018 12:29:48 -0000 1.11.4.1
+++ sys/arch/amd64/amd64/spl.S 21 Jun 2018 11:54:01 -0000
@@ -158,18 +158,6 @@ KIDTVEC(doreti)
jmp *IS_RESUME(%rax)
2: /* Check for ASTs on exit to user mode. */
movl %ebx,CPUVAR(ILEVEL)
-5: CHECK_ASTPENDING(%r11)
- je 3f
- testb $SEL_RPL,TF_CS(%rsp)
- jz 3f
-4: CLEAR_ASTPENDING(%r11)
- sti
- movq %rsp, %rdi
- call _C_LABEL(ast)
- cli
- jmp 5b
-3:
-#ifdef DIAGNOSTIC
- movl $254,%esi
-#endif /* DIAGNOSTIC */
+ testb $SEL_RPL,TF_CS(%rsp)
+ jnz intr_user_exit
INTRFASTEXIT
Index: sys/arch/amd64/amd64/trap.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/trap.c,v
retrieving revision 1.61.2.1
diff -u -p -r1.61.2.1 trap.c
--- sys/arch/amd64/amd64/trap.c 26 Feb 2018 12:29:48 -0000 1.61.2.1
+++ sys/arch/amd64/amd64/trap.c 21 Jun 2018 11:54:01 -0000
@@ -97,7 +97,7 @@ void trap(struct trapframe *);
void ast(struct trapframe *);
void syscall(struct trapframe *);

-const char *trap_type[] = {
+const char * const trap_type[] = {
"privileged instruction fault", /* 0 T_PRIVINFLT */
"breakpoint trap", /* 1 T_BPTFLT */
"arithmetic trap", /* 2 T_ARITHTRAP */
@@ -119,17 +119,18 @@ const char *trap_type[] = {
"machine check", /* 18 T_MCA */
"SSE FP exception", /* 19 T_XMM */
};
-int trap_types = nitems(trap_type);
+const int trap_types = nitems(trap_type);

#ifdef DEBUG
int trapdebug = 0;
#endif

-#define IDTVEC(name) __CONCAT(X, name)
+static inline void frame_dump(struct trapframe *_tf, struct proc *_p,
+ const char *_sig, uint64_t _cr2);
+static inline void verify_smap(const char *_func);
+static inline void debug_trap(struct trapframe *_frame, struct proc *_p,
+ long _type);

-#ifdef TRAP_SIGDEBUG
-static void frame_dump(struct trapframe *);
-#endif

/*
* trap(frame):
@@ -144,38 +145,17 @@ trap(struct trapframe *frame)
struct proc *p = curproc;
int type = (int)frame->tf_trapno;
struct pcb *pcb;
- extern char doreti_iret[], resume_iret[];
- extern char xrstor_fault[], xrstor_resume[];
caddr_t onfault;
int error;
uint64_t cr2;
union sigval sv;

+ verify_smap(__func__);
uvmexp.traps++;
+ debug_trap(frame, p, type);

pcb = (p != NULL && p->p_addr != NULL) ? &p->p_addr->u_pcb : NULL;

-#ifdef DEBUG
- if (trapdebug) {
- printf("trap %d code %llx rip %llx cs %llx rflags %llx "
- "cr2 %llx cpl %x\n",
- type, frame->tf_err, frame->tf_rip, frame->tf_cs,
- frame->tf_rflags, rcr2(), curcpu()->ci_ilevel);
- printf("curproc %p\n", (void *)p);
- if (p != NULL)
- printf("pid %d\n", p->p_p->ps_pid);
- }
-#endif
-#ifdef DIAGNOSTIC
- if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) {
- u_long rf = read_rflags();
- if (rf & PSL_AC) {
- write_rflags(rf & ~PSL_AC);
- panic("%s: AC set on entry", "trap");
- }
- }
-#endif
-
if (!KERNELMODE(frame->tf_cs, frame->tf_rflags)) {
type |= T_USER;
p->p_md.md_regs = frame;
@@ -205,27 +185,6 @@ trap(struct trapframe *frame)
/*NOTREACHED*/

case T_PROTFLT:
- /*
- * Check for xrstor faulting because of invalid xstate
- * We do this by looking at the address of the
- * instruction that faulted.
- */
- if (frame->tf_rip == (u_int64_t)xrstor_fault && p != NULL) {
- frame->tf_rip = (u_int64_t)xrstor_resume;
- return;
- }
-
- /*
- * Check for failure during return to user mode.
- * We do this by looking at the address of the
- * instruction that faulted.
- */
- if (frame->tf_rip == (u_int64_t)doreti_iret) {
- frame->tf_rip = (u_int64_t)resume_iret;
- return;
- }
- /* FALLTHROUGH */
-
case T_SEGNPFLT:
case T_ALIGNFLT:
case T_TSSFLT:
@@ -243,12 +202,7 @@ copyfault:
case T_TSSFLT|T_USER:
case T_SEGNPFLT|T_USER:
case T_STKFLT|T_USER:
-#ifdef TRAP_SIGDEBUG
- printf("pid %d (%s): %s at rip %llx addr %llx\n",
- p->p_p->ps_pid, p->p_p->ps_comm, "BUS",
- frame->tf_rip, rcr2());
- frame_dump(frame);
-#endif
+ frame_dump(frame, p, "BUS", 0);
sv.sival_ptr = (void *)frame->tf_rip;
KERNEL_LOCK();
trapsignal(p, SIGBUS, type & ~T_USER, BUS_OBJERR, sv);
@@ -267,30 +221,11 @@ copyfault:
trapsignal(p, SIGILL, type & ~T_USER, ILL_PRVOPC, sv);
KERNEL_UNLOCK();
goto out;
- case T_FPOPFLT|T_USER: /* coprocessor operand fault */
-#ifdef TRAP_SIGDEBUG
- printf("pid %d (%s): %s at rip %llx addr %llx\n",
- p->p_p->ps_pid, p->p_p->ps_comm, "ILL",
- frame->tf_rip, rcr2());
- frame_dump(frame);
-#endif
- sv.sival_ptr = (void *)frame->tf_rip;
- KERNEL_LOCK();
- trapsignal(p, SIGILL, type & ~T_USER, ILL_COPROC, sv);
- KERNEL_UNLOCK();
- goto out;
+ case T_FPOPFLT|T_USER: /* impossible without 32bit compat */
case T_BOUND|T_USER:
- sv.sival_ptr = (void *)frame->tf_rip;
- KERNEL_LOCK();
- trapsignal(p, SIGFPE, type &~ T_USER, FPE_FLTSUB, sv);
- KERNEL_UNLOCK();
- goto out;
case T_OFLOW|T_USER:
- sv.sival_ptr = (void *)frame->tf_rip;
- KERNEL_LOCK();
- trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTOVF, sv);
- KERNEL_UNLOCK();
- goto out;
+ case T_DNA|T_USER:
+ panic("impossible trap");
case T_DIVIDE|T_USER:
sv.sival_ptr = (void *)frame->tf_rip;
KERNEL_LOCK();
@@ -401,18 +336,13 @@ faultcommon:
p->p_ucred ? (int)p->p_ucred->cr_uid : -1);
signal = SIGKILL;
} else {
-#ifdef TRAP_SIGDEBUG
- printf("pid %d (%s): %s at rip %llx addr %llx\n",
- p->p_p->ps_pid, p->p_p->ps_comm, "SEGV",
- frame->tf_rip, rcr2());
- frame_dump(frame);
-#endif
- }
- if (error == EACCES)
- sicode = SEGV_ACCERR;
- if (error == EIO) {
- signal = SIGBUS;
- sicode = BUS_OBJERR;
+ frame_dump(frame, p, "SEGV", cr2);
+ if (error == EACCES)
+ sicode = SEGV_ACCERR;
+ else if (error == EIO) {
+ signal = SIGBUS;
+ sicode = BUS_OBJERR;
+ }
}
sv.sival_ptr = (void *)fa;
trapsignal(p, signal, T_PAGEFLT, sicode, sv);
@@ -455,10 +385,12 @@ out:
userret(p);
}

-#ifdef TRAP_SIGDEBUG
-static void
-frame_dump(struct trapframe *tf)
+static inline void
+frame_dump(struct trapframe *tf, struct proc *p, const char *sig, uint64_t cr2)
{
+#ifdef TRAP_SIGDEBUG
+ printf("pid %d (%s): %s at rip %llx addr %llx\n",
+ p->p_p->ps_pid, p->p_p->ps_comm, sig, tf->tf_rip, cr2);
printf("rip %p cs 0x%x rfl %p rsp %p ss 0x%x\n",
(void *)tf->tf_rip, (unsigned)tf->tf_cs & 0xffff,
(void *)tf->tf_rflags,
@@ -475,8 +407,38 @@ frame_dump(struct trapframe *tf)
(void *)tf->tf_r13, (void *)tf->tf_r14, (void *)tf->tf_r15);
printf("rbp %p rbx %p rax %p\n",
(void *)tf->tf_rbp, (void *)tf->tf_rbx, (void *)tf->tf_rax);
+#endif
}
+
+static inline void
+verify_smap(const char *func)
+{
+#ifdef DIAGNOSTIC
+ if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) {
+ u_long rf = read_rflags();
+ if (rf & PSL_AC) {
+ write_rflags(rf & ~PSL_AC);
+ panic("%s: AC set on entry", func);
+ }
+ }
#endif
+}
+
+static inline void
+debug_trap(struct trapframe *frame, struct proc *p, long type)
+{
+#ifdef DEBUG
+ if (trapdebug) {
+ printf("trap %ld code %llx rip %llx cs %llx rflags %llx "
+ "cr2 %llx cpl %x\n",
+ type, frame->tf_err, frame->tf_rip, frame->tf_cs,
+ frame->tf_rflags, rcr2(), curcpu()->ci_ilevel);
+ printf("curproc %p\n", (void *)p);
+ if (p != NULL)
+ printf("pid %d\n", p->p_p->ps_pid);
+ }
+#endif
+}

/*
@@ -514,16 +476,7 @@ syscall(struct trapframe *frame)
size_t argsize, argoff;
register_t code, args[9], rval[2], *argp;

-#ifdef DIAGNOSTIC
- if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) {
- u_long rf = read_rflags();
- if (rf & PSL_AC) {
- write_rflags(rf & ~PSL_AC);
- panic("%s: AC set on entry", "syscall");
- }
- }
-#endif
-
+ verify_smap(__func__);
uvmexp.syscalls++;
p = curproc;

Index: sys/arch/amd64/amd64/vector.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vector.S,v
retrieving revision 1.51.2.2
diff -u -p -r1.51.2.2 vector.S
--- sys/arch/amd64/amd64/vector.S 28 Feb 2018 17:01:34 -0000 1.51.2.2
+++ sys/arch/amd64/amd64/vector.S 21 Jun 2018 11:54:01 -0000
@@ -179,17 +179,7 @@ IDTVEC(trap05)
IDTVEC(trap06)
ZTRAP(T_PRIVINFLT)
IDTVEC(trap07)
- pushq $0 # dummy error code
- pushq $T_DNA
- INTRENTRY(trap07)
- sti
- cld
- SMAP_CLAC
- movq CPUVAR(SELF),%rdi
- movq %rsp, %rsi
- call _C_LABEL(fpudna)
- cli
- INTRFASTEXIT
+ ZTRAP(T_DNA) # impossible: we don't do lazy FPU
IDTVEC(trap08)
pushq $T_DOUBLEFLT
jmp calltrap_specstk
@@ -202,59 +192,47 @@ IDTVEC(trap0b)
IDTVEC(trap0c)
TRAP(T_STKFLT)

- /*
- * If iretq faults, we'll get a trap at doreti_iret with CPL==0 but
- * the user's GS.base, which INTRENTRY wouldn't handle correctly
- * (it would skip the swapgs), so locally expand both it and
- * INTR_SAVE_GPRS, but add an extra test comparing %rip to doreti_iret
- * so that we can do the necessary swapgs in that case.
- */
+/*
+ * The #GP (general protection fault) handler has a couple weird cases
+ * to handle:
+ * - trapping in iretq to userspace and
+ * - trapping in xrstor in the kernel.
+ * We detect both of these by examining the %rip in the iretq_frame.
+ * Handling them is done by updating %rip in the iretq_frame to point
+ * to a stub handler of some sort and then iretq'ing to it. For the
+ * iretq fault we resume in a stub which acts like we got a fresh #GP.
+ * For the xrstor fault we resume to a stub which returns an error to
+ * the routine that requested the xrstor.
+ */
IDTVEC(trap0d)
+ pushq %rdx
pushq %rcx
- leaq _C_LABEL(doreti_iret)(%rip),%rcx
- cmpq %rcx,16(%rsp) /* over %rcx and err to %rip */
+ movq 24(%rsp),%rdx /* over %r[cd]x and err to %rip */
+ leaq doreti_iret(%rip),%rcx
+ cmpq %rcx,%rdx
+ je .Lhandle_doreti
+ leaq xrstor_fault(%rip),%rcx
+ cmpq %rcx,%rdx
+ je .Lhandle_xrstor
popq %rcx
- je 1f
- testq $SEL_RPL,16(%rsp) /* over err and %rip to %cs */
- je INTRENTRY_LABEL(trap0d)
-1: swapgs
- movq %rax,CPUVAR(SCRATCH)
- movq CPUVAR(KERN_CR3),%rax
- testq %rax,%rax
- jz 98f
- movq %rax,%cr3
- jmp 98f
- .text
- .globl INTRENTRY_LABEL(trap0d)
-INTRENTRY_LABEL(trap0d): /* from kernel */
- pushq $T_PROTFLT
- subq $152,%rsp
- movq %rcx,TF_RCX(%rsp)
- jmp 99f
-98: /* from userspace */
- movq CPUVAR(KERN_RSP),%rax
- xchgq %rax,%rsp
- movq %rcx,TF_RCX(%rsp)
- /* set trapno in the trap frame */
- movq $T_PROTFLT,TF_TRAPNO(%rsp)
- /* copy err and iretq frame to the trap frame */
- movq 0(%rax),%rcx
- movq %rcx,TF_ERR(%rsp)
- add $8,%rax
- movq IRETQ_RIP(%rax),%rcx
- movq %rcx,TF_RIP(%rsp)
- movq IRETQ_CS(%rax),%rcx
- movq %rcx,TF_CS(%rsp)
- movq IRETQ_RFLAGS(%rax),%rcx
- movq %rcx,TF_RFLAGS(%rsp)
- movq IRETQ_RSP(%rax),%rcx
- movq %rcx,TF_RSP(%rsp)
- movq IRETQ_SS(%rax),%rcx
- movq %rcx,TF_SS(%rsp)
- movq CPUVAR(SCRATCH),%rax
-99: INTR_SAVE_MOST_GPRS_NO_ADJ
- sti
- jmp calltrap
+ popq %rdx
+ TRAP(T_PROTFLT)
+
+.Lhandle_xrstor:
+ /* xrstor faulted; just resume in xrstor_resume */
+ leaq xrstor_resume(%rip),%rcx
+ jmp 1f
+
+.Lhandle_doreti:
+ /* iretq faulted; resume in a stub that acts like we got a #GP */
+ leaq .Lhandle_doreti_resume(%rip),%rcx
+1: movq %rcx,24(%rsp) /* over %r[cd]x and err to %rip */
+ popq %rcx
+ popq %rdx
+ addq $8,%rsp /* pop the err code */
+ jmp doreti_iret
+.Lhandle_doreti_resume:
+ ZTRAP(T_PROTFLT)

IDTVEC(trap0e)
TRAP(T_PAGEFLT)
@@ -305,55 +283,12 @@ Xexceptions:
.quad _C_LABEL(Xtrap1e), _C_LABEL(Xtrap1f)

/*
- * If an error is detected during trap, syscall, or interrupt exit, trap() will
- * change %rip to point to this label. At that point, we'll be running with
- * the kernel GS.base, but the trap frame will be from CPL==3, so we can't
- * go through INTRENTRY as it would do the swapgs that we don't want/need.
- * So, locally expand INTRENTRY but without the swapgs: manually
- * clean up the stack and resume as if we were handling a general
- * protection fault. This will cause the process to get a SIGBUS.
- */
-NENTRY(resume_iret)
- movq %rax,CPUVAR(SCRATCH)
- movq CPUVAR(KERN_CR3),%rax
- testq %rax,%rax
- jz INTRENTRY_LABEL(iret)
- movq %rax,%cr3
- jmp INTRENTRY_LABEL(iret)
- .text
- .globl INTRENTRY_LABEL(iret)
-INTRENTRY_LABEL(iret): /* from kernel */
- movq CPUVAR(KERN_RSP),%rax
- xchgq %rax,%rsp
- movq %rcx,TF_RCX(%rsp)
- /* set trapno+err in the trap frame */
- movq $T_PROTFLT,TF_TRAPNO(%rsp)
- movq $0,TF_ERR(%rsp)
- /* copy iretq frame to the trap frame */
- movq IRETQ_RIP(%rax),%rcx
- movq %rcx,TF_RIP(%rsp)
- movq IRETQ_CS(%rax),%rcx
- movq %rcx,TF_CS(%rsp)
- movq IRETQ_RFLAGS(%rax),%rcx
- movq %rcx,TF_RFLAGS(%rsp)
- movq IRETQ_RSP(%rax),%rcx
- movq %rcx,TF_RSP(%rsp)
- movq IRETQ_SS(%rax),%rcx
- movq %rcx,TF_SS(%rsp)
- movq CPUVAR(SCRATCH),%rax
- INTR_SAVE_MOST_GPRS_NO_ADJ
- sti
- jmp calltrap
-
-
-/*
* All traps go through here. Call the generic trap handler, and
* check for ASTs afterwards.
*/
KUENTRY(alltraps)
INTRENTRY(alltraps)
sti
-calltrap:
cld
SMAP_CLAC
#ifdef DIAGNOSTIC
@@ -376,19 +311,14 @@ calltrap:
jz 2f
.Lreal_trap:
#endif /* !defined(GPROF) && defined(DDBPROF) */
+ .globl recall_trap
+recall_trap:
movq %rsp, %rdi
call _C_LABEL(trap)
2: /* Check for ASTs on exit to user mode. */
cli
- CHECK_ASTPENDING(%r11)
- je 1f
testb $SEL_RPL,TF_CS(%rsp)
- jz 1f
-5: CLEAR_ASTPENDING(%r11)
- sti
- movq %rsp, %rdi
- call _C_LABEL(ast)
- jmp 2b
+ jnz intr_user_exit
#ifndef DIAGNOSTIC
1: INTRFASTEXIT
#else /* DIAGNOSTIC */
@@ -396,7 +326,7 @@ calltrap:
jne 3f
INTRFASTEXIT
3: sti
- movabsq $spl_lowered,%rdi
+ leaq spl_lowered(%rip),%rdi
movl CPUVAR(ILEVEL),%esi
movl %ebx,%edx
xorq %rax,%rax
@@ -601,7 +531,6 @@ KIDTVEC(resume_xen_upcall)
2:
movq $(1 << LIR_XEN),%rax
orq %rax,CPUVAR(IPENDING)
-3:
INTRFASTEXIT
#endif /* NXEN > 0 */

@@ -636,7 +565,6 @@ KIDTVEC(resume_hyperv_upcall)
2:
movq $(1 << LIR_HYPERV),%rax
orq %rax,CPUVAR(IPENDING)
-3:
INTRFASTEXIT
#endif /* NHYPERV > 0 */
#endif /* NLAPIC > 0 */
@@ -682,7 +610,7 @@ IDTVEC(intr_##name##num) ;\
SMAP_CLAC ;\
incl CPUVAR(IDEPTH) ;\
movq IS_HANDLERS(%r14),%rbx ;\
-6: \
+6: /* loop, walking chain of handlers */ \
movl IH_LEVEL(%rbx),%r12d ;\
cmpl %r13d,%r12d ;\
jle 7f ;\
@@ -693,6 +621,8 @@ IDTVEC(intr_##name##num) ;\
orl %eax,%eax /* should it be counted? */ ;\
jz 4f /* no, skip it */ ;\
incq IH_COUNT(%rbx) /* count the intrs */ ;\
+ cmpl $2,%eax /* can't know if it was ours */ ;\
+ je 4f /* keep trying */ ;\
cmpl $0,_C_LABEL(intr_shared_edge) ;\
jne 4f /* if no shared edges ... */ ;\
orl %eax,%eax /* 1 means stop trying */ ;\
@@ -700,13 +630,13 @@ IDTVEC(intr_##name##num) ;\
4: movq IH_NEXT(%rbx),%rbx /* next handler in chain */ ;\
testq %rbx,%rbx ;\
jnz 6b ;\
-5: \
+5: /* successfully handled */ \
cli ;\
unmask(num) /* unmask it in hardware */ ;\
late_ack(num) ;\
sti ;\
jmp _C_LABEL(Xdoreti) /* lower spl and do ASTs */ ;\
-7: \
+7: /* current IPL > handler's ih_level */ \
cli ;\
movq $(1 << num),%rax ;\
orq %rax,CPUVAR(IPENDING) ;\
@@ -714,16 +644,18 @@ IDTVEC(intr_##name##num) ;\
late_ack(num) ;\
sti ;\
jmp _C_LABEL(Xdoreti) /* lower spl and do ASTs */ ;\
-10: \
+10: /* currently masked */ \
cli ;\
movq $(1 << num),%rax ;\
orq %rax,CPUVAR(IPENDING) ;\
level_mask(num) ;\
late_ack(num) ;\
INTRFASTEXIT ;\
-9: \
+9: /* spurious interrupt */ \
unmask(num) ;\
late_ack(num) ;\
+ testb $SEL_RPL,TF_CS(%rsp) ;\
+ jnz intr_user_exit ;\
INTRFASTEXIT

#define ICUADDR IO_ICU1
Index: sys/arch/amd64/amd64/via.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/via.c,v
retrieving revision 1.23
diff -u -p -r1.23 via.c
--- sys/arch/amd64/amd64/via.c 2 May 2017 11:47:49 -0000 1.23
+++ sys/arch/amd64/amd64/via.c 21 Jun 2018 11:54:01 -0000
@@ -317,18 +317,11 @@ static __inline void
viac3_cbc(void *cw, void *src, void *dst, void *key, int rep,
void *iv)
{
- unsigned int creg0;
-
- creg0 = rcr0(); /* Permit access to SIMD/FPU path */
- lcr0(creg0 & ~(CR0_EM|CR0_TS));
-
/* Do the deed */
__asm volatile("pushfq; popfq");
__asm volatile("rep xcryptcbc" :
: "b" (key), "a" (iv), "c" (rep), "d" (cw), "S" (src), "D" (dst)
: "memory", "cc");
-
- lcr0(creg0);
}

int
@@ -521,14 +514,8 @@ void
viac3_rnd(void *v)
{
struct timeout *tmo = v;
- unsigned int *p, i, rv, creg0, len = VIAC3_RNG_BUFSIZ;
+ unsigned int *p, i, rv, len = VIAC3_RNG_BUFSIZ;
static int buffer[VIAC3_RNG_BUFSIZ + 2]; /* XXX why + 2? */
-#ifdef MULTIPROCESSOR
- int s = splipi();
-#endif
-
- creg0 = rcr0(); /* Permit access to SIMD/FPU path */
- lcr0(creg0 & ~(CR0_EM|CR0_TS));

/*
* Here we collect the random data from the VIA C3 RNG. We make
@@ -538,12 +525,6 @@ viac3_rnd(void *v)
__asm volatile("rep xstorerng"
: "=a" (rv) : "d" (3), "D" (buffer), "c" (len*sizeof(int))
: "memory", "cc");
-
- lcr0(creg0);
-
-#ifdef MULTIPROCESSOR
- splx(s);
-#endif

for (i = 0, p = buffer; i < VIAC3_RNG_BUFSIZ; i++, p++)
add_true_randomness(*p);
Index: sys/arch/amd64/amd64/vm_machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vm_machdep.c,v
retrieving revision 1.40
diff -u -p -r1.40 vm_machdep.c
--- sys/arch/amd64/amd64/vm_machdep.c 12 Sep 2017 02:58:08 -0000 1.40
+++ sys/arch/amd64/amd64/vm_machdep.c 21 Jun 2018 11:54:01 -0000
@@ -73,19 +73,12 @@ cpu_fork(struct proc *p1, struct proc *p
void (*func)(void *), void *arg)
{
struct pcb *pcb = &p2->p_addr->u_pcb;
+ struct pcb *pcb1 = &p1->p_addr->u_pcb;
struct trapframe *tf;
struct switchframe *sf;

- /*
- * If fpuproc != p1, then the fpu h/w state is irrelevant and the
- * state had better already be in the pcb. This is true for forks
- * but not for dumps.
- *
- * If fpuproc == p1, then we have to save the fpu h/w state to
- * p1's pcb so that we can copy it.
- */
- if (p1->p_addr->u_pcb.pcb_fpcpu != NULL)
- fpusave_proc(p1, 1);
+ /* Save the fpu h/w state to p1's pcb so that we can copy it. */
+ fpusave(&pcb1->pcb_savefpu);

p2->p_md.md_flags = p1->p_md.md_flags;

@@ -93,7 +86,7 @@ cpu_fork(struct proc *p1, struct proc *p
if (p1 != curproc && p1 != &proc0)
panic("cpu_fork: curproc");
#endif
- *pcb = p1->p_addr->u_pcb;
+ *pcb = *pcb1;

/*
* Activate the address space.
@@ -137,11 +130,6 @@ cpu_fork(struct proc *p1, struct proc *p
void
cpu_exit(struct proc *p)
{
-
- /* If we were using the FPU, forget about it. */
- if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
- fpusave_proc(p, 0);
-
pmap_deactivate(p);
sched_exit(p);
}
Index: sys/arch/amd64/amd64/vmm.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.170
diff -u -p -r1.170 vmm.c
--- sys/arch/amd64/amd64/vmm.c 8 Sep 2017 05:36:51 -0000 1.170
+++ sys/arch/amd64/amd64/vmm.c 21 Jun 2018 11:54:01 -0000
@@ -3584,39 +3584,67 @@ vcpu_must_stop(struct vcpu *vcpu)
}

/*
- * vmm_fpusave
+ * vmm_fpurestore
*
- * Modified version of fpusave_cpu from fpu.c that only saves the FPU context
- * and does not call splipi/splx. Must be called with interrupts disabled.
+ * Restore the guest's FPU state, saving the existing userland thread's
+ * FPU context if necessary. Must be called with interrupts disabled.
*/
-void
-vmm_fpusave(void)
+int
+vmm_fpurestore(struct vcpu *vcpu)
{
- struct proc *p;
struct cpu_info *ci = curcpu();

- p = ci->ci_fpcurproc;
- if (p == NULL)
- return;
+ /* save vmmd's FPU state if we haven't already */
+ if (ci->ci_flags & CPUF_USERXSTATE) {
+ ci->ci_flags &= ~CPUF_USERXSTATE;
+ fpusavereset(&curproc->p_addr->u_pcb.pcb_savefpu);
+ }

- if (ci->ci_fpsaving != 0)
- panic("%s: recursive save!", __func__);
- /*
- * Set ci->ci_fpsaving, so that any pending exception will be
- * thrown away. (It will be caught again if/when the FPU
- * state is restored.)
- */
- ci->ci_fpsaving = 1;
- if (xsave_mask)
- xsave(&p->p_addr->u_pcb.pcb_savefpu, xsave_mask);
- else
- fxsave(&p->p_addr->u_pcb.pcb_savefpu);
- ci->ci_fpsaving = 0;
+ if (vcpu->vc_fpuinited) {
+ /* Restore guest XCR0 and FPU context */
+ if (vcpu->vc_gueststate.vg_xcr0 & ~xsave_mask) {
+ DPRINTF("%s: guest attempted to set invalid %s\n"
+ __func__, "bits in xcr0");
+ return EINVAL;
+ }

- p->p_addr->u_pcb.pcb_cr0 |= CR0_TS;
+ if (xrstor_user(&vcpu->vc_g_fpu, xsave_mask)) {
+ DPRINTF("%s: guest attempted to set invalid %s\n"
+ __func__, "xsave/xrstor state");
+ return EINVAL;
+ }
+ }
+
+ if (xsave_mask) {
+ /* Restore guest %xcr0 */
+ xsetbv(0, vcpu->vc_gueststate.vg_xcr0);
+ }

- p->p_addr->u_pcb.pcb_fpcpu = NULL;
- ci->ci_fpcurproc = NULL;
+ return 0;
+}
+
+/*
+ * vmm_fpusave
+ *
+ * Save the guest's FPU state. Must be called with interrupts disabled.
+ */
+void
+vmm_fpusave(struct vcpu *vcpu)
+{
+ if (xsave_mask) {
+ /* Save guest %xcr0 */
+ vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
+
+ /* Restore host %xcr0 */
+ xsetbv(0, xsave_mask);
+ }
+
+ /*
+ * Save full copy of FPU state - guest content is always
+ * a subset of host's save area (see xsetbv exit handler)
+ */
+ fpusavereset(&vcpu->vc_g_fpu);
+ vcpu->vc_fpuinited = 1;
}

/*
@@ -3839,39 +3867,10 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v

/* Disable interrupts and save the current FPU state. */
disable_intr();
- clts();
- vmm_fpusave();
-
- /* Initialize the guest FPU if not inited already */
- if (!vcpu->vc_fpuinited) {
- fninit();
- bzero(&vcpu->vc_g_fpu.fp_fxsave,
- sizeof(vcpu->vc_g_fpu.fp_fxsave));
- vcpu->vc_g_fpu.fp_fxsave.fx_fcw =
- __INITIAL_NPXCW__;
- vcpu->vc_g_fpu.fp_fxsave.fx_mxcsr =
- __INITIAL_MXCSR__;
- fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
-
- vcpu->vc_fpuinited = 1;
- }
-
- if (xsave_mask) {
- /* Restore guest XCR0 and FPU context */
- if (vcpu->vc_gueststate.vg_xcr0 & ~xsave_mask) {
- DPRINTF("%s: guest attempted to set invalid "
- "bits in xcr0\n", __func__);
- ret = EINVAL;
- stts();
- enable_intr();
- break;
- }
-
- /* Restore guest %xcr0 */
- xrstor(&vcpu->vc_g_fpu, xsave_mask);
- xsetbv(0, vcpu->vc_gueststate.vg_xcr0);
- } else
- fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
+ if ((ret = vmm_fpurestore(vcpu))) {
+ enable_intr();
+ break;
+ }

KERNEL_UNLOCK();
ret = vmx_enter_guest(&vcpu->vc_control_pa,
@@ -3882,27 +3881,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
* the guest FPU state still possibly on the CPU. Save the FPU
* state before re-enabling interrupts.
*/
- if (xsave_mask) {
- /* Save guest %xcr0 */
- vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
-
- /* Restore host %xcr0 */
- xsetbv(0, xsave_mask);
-
- /*
- * Save full copy of FPU state - guest content is
- * always a subset of host's save area (see xsetbv
- * exit handler)
- */
- xsave(&vcpu->vc_g_fpu, xsave_mask);
- } else
- fxsave(&vcpu->vc_g_fpu);
-
- /*
- * FPU state is invalid, set CR0_TS to force DNA trap on next
- * access.
- */
- stts();
+ vmm_fpusave(vcpu);

enable_intr();

@@ -5715,39 +5694,10 @@ vcpu_run_svm(struct vcpu *vcpu, struct v

/* Disable interrupts and save the current FPU state. */
disable_intr();
- clts();
- vmm_fpusave();
-
- /* Initialize the guest FPU if not inited already */
- if (!vcpu->vc_fpuinited) {
- fninit();
- bzero(&vcpu->vc_g_fpu.fp_fxsave,
- sizeof(vcpu->vc_g_fpu.fp_fxsave));
- vcpu->vc_g_fpu.fp_fxsave.fx_fcw =
- __INITIAL_NPXCW__;
- vcpu->vc_g_fpu.fp_fxsave.fx_mxcsr =
- __INITIAL_MXCSR__;
- fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
-
- vcpu->vc_fpuinited = 1;
- }
-
- if (xsave_mask) {
- /* Restore guest XCR0 and FPU context */
- if (vcpu->vc_gueststate.vg_xcr0 & ~xsave_mask) {
- DPRINTF("%s: guest attempted to set invalid "
- "bits in xcr0\n", __func__);
- ret = EINVAL;
- stts();
- enable_intr();
- break;
- }
-
- /* Restore guest %xcr0 */
- xrstor(&vcpu->vc_g_fpu, xsave_mask);
- xsetbv(0, vcpu->vc_gueststate.vg_xcr0);
- } else
- fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
+ if ((ret = vmm_fpurestore(vcpu))) {
+ enable_intr();
+ break;
+ }

KERNEL_UNLOCK();

@@ -5761,27 +5711,7 @@ vcpu_run_svm(struct vcpu *vcpu, struct v
* the guest FPU state still possibly on the CPU. Save the FPU
* state before re-enabling interrupts.
*/
- if (xsave_mask) {
- /* Save guest %xcr0 */
- vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
-
- /* Restore host %xcr0 */
- xsetbv(0, xsave_mask);
-
- /*
- * Save full copy of FPU state - guest content is
- * always a subset of host's save area (see xsetbv
- * exit handler)
- */
- xsave(&vcpu->vc_g_fpu, xsave_mask);
- } else
- fxsave(&vcpu->vc_g_fpu);
-
- /*
- * FPU state is invalid, set CR0_TS to force DNA trap on next
- * access.
- */
- stts();
+ vmm_fpusave(vcpu);

enable_intr();

Index: sys/arch/amd64/include/codepatch.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/codepatch.h,v
retrieving revision 1.4
diff -u -p -r1.4 codepatch.h
--- sys/arch/amd64/include/codepatch.h 25 Aug 2017 19:28:48 -0000 1.4
+++ sys/arch/amd64/include/codepatch.h 21 Jun 2018 11:54:01 -0000
@@ -50,6 +50,8 @@ void codepatch_call(uint16_t tag, void *
#define CPTAG_STAC 1
#define CPTAG_CLAC 2
#define CPTAG_EOI 3
+#define CPTAG_XRSTOR 4
+#define CPTAG_XSAVE 5

/*
* As stac/clac SMAP instructions are 3 bytes, we want the fastest
Index: sys/arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.114.4.1
diff -u -p -r1.114.4.1 cpu.h
--- sys/arch/amd64/include/cpu.h 26 Feb 2018 12:29:48 -0000 1.114.4.1
+++ sys/arch/amd64/include/cpu.h 21 Jun 2018 11:54:01 -0000
@@ -115,10 +115,6 @@ struct cpu_info {
u_int64_t ci_intr_rsp; /* U<-->K trampoline stack */
u_int64_t ci_user_cr3; /* U-K page table */

- struct proc *ci_fpcurproc;
- struct proc *ci_fpsaveproc;
- int ci_fpsaving;
-
struct pcb *ci_curpcb;
struct pcb *ci_idle_pcb;

@@ -216,9 +212,9 @@ struct cpu_info {
#define CPUF_IDENTIFIED 0x0020 /* CPU has been identified */

#define CPUF_CONST_TSC 0x0040 /* CPU has constant TSC */
-#define CPUF_USERSEGS_BIT 7 /* CPU has curproc's segments */
-#define CPUF_USERSEGS (1<<CPUF_USERSEGS_BIT) /* and FS.base */
+#define CPUF_USERSEGS 0x0080 /* CPU has curproc's segs and FS.base */
#define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */
+#define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */

#define CPUF_PRESENT 0x1000 /* CPU is present */
#define CPUF_RUNNING 0x2000 /* CPU is running */
@@ -268,7 +264,6 @@ extern void need_resched(struct cpu_info
extern struct cpu_info *cpu_info[MAXCPUS];

void cpu_boot_secondary_processors(void);
-void cpu_init_idle_pcbs(void);

void cpu_kick(struct cpu_info *);
void cpu_unidle(struct cpu_info *);
@@ -371,7 +366,6 @@ void dumpconf(void);
void cpu_reset(void);
void x86_64_proc0_tss_ldt_init(void);
void x86_64_bufinit(void);
-void x86_64_init_pcb_tss_ldt(struct cpu_info *);
void cpu_proc_fork(struct proc *, struct proc *);
int amd64_pa_used(paddr_t);
extern void (*cpu_idle_enter_fcn)(void);
Index: sys/arch/amd64/include/fpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/fpu.h,v
retrieving revision 1.12
diff -u -p -r1.12 fpu.h
--- sys/arch/amd64/include/fpu.h 27 Apr 2017 06:16:39 -0000 1.12
+++ sys/arch/amd64/include/fpu.h 21 Jun 2018 11:54:01 -0000
@@ -7,10 +7,11 @@
#include <sys/types.h>

/*
- * amd64 only uses the extended save/restore format used
- * by fxsave/fsrestore, to always deal with the SSE registers,
- * which are part of the ABI to pass floating point values.
- * Must be stored in memory on a 16-byte boundary.
+ * If the CPU supports xsave/xrstor then we use them so that we can provide
+ * AVX support. Otherwise we require fxsave/fxrstor, as the SSE registers
+ * are part of the ABI for passing floating point values.
+ * While fxsave/fxrstor only required 16-byte alignment for the save area,
+ * xsave/xrstor requires the save area to have 64-byte alignment.
*/

struct fxsave64 {
@@ -63,23 +64,22 @@ extern uint32_t fpu_mxcsr_mask;
extern uint64_t xsave_mask;

void fpuinit(struct cpu_info *);
-void fpudrop(void);
-void fpudiscard(struct proc *);
void fputrap(struct trapframe *);
-void fpusave_proc(struct proc *, int);
-void fpusave_cpu(struct cpu_info *, int);
+void fpusave(struct savefpu *);
+void fpusavereset(struct savefpu *);
void fpu_kernel_enter(void);
void fpu_kernel_exit(void);

+int xrstor_user(struct savefpu *_addr, uint64_t _mask);
+#define fpureset() \
+ xrstor_user(&proc0.p_addr->u_pcb.pcb_savefpu, xsave_mask)
+
#define fninit() __asm("fninit")
#define fwait() __asm("fwait")
-#define fnclex() __asm("fnclex")
+/* should be fxsave64, but where we use this it doesn't matter */
#define fxsave(addr) __asm("fxsave %0" : "=m" (*addr))
-#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*addr))
#define ldmxcsr(addr) __asm("ldmxcsr %0" : : "m" (*addr))
#define fldcw(addr) __asm("fldcw %0" : : "m" (*addr))
-#define clts() __asm("clts")
-#define stts() lcr0(rcr0() | CR0_TS)

static inline void
xsave(struct savefpu *addr, uint64_t mask)
@@ -88,18 +88,9 @@ xsave(struct savefpu *addr, uint64_t mas

lo = mask;
hi = mask >> 32;
+ /* should be xsave64, but where we use this it doesn't matter */
__asm volatile("xsave %0" : "=m" (*addr) : "a" (lo), "d" (hi) :
"memory");
-}
-
-static inline void
-xrstor(struct savefpu *addr, uint64_t mask)
-{
- uint32_t lo, hi;
-
- lo = mask;
- hi = mask >> 32;
- __asm volatile("xrstor %0" : : "m" (*addr), "a" (lo), "d" (hi));
}

#endif
Index: sys/arch/amd64/include/intrdefs.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/intrdefs.h,v
retrieving revision 1.16
diff -u -p -r1.16 intrdefs.h
--- sys/arch/amd64/include/intrdefs.h 22 Jun 2016 01:12:38 -0000 1.16
+++ sys/arch/amd64/include/intrdefs.h 21 Jun 2018 11:54:01 -0000
@@ -75,8 +75,6 @@

#define X86_IPI_HALT 0x00000001
#define X86_IPI_NOP 0x00000002
-#define X86_IPI_FLUSH_FPU 0x00000004
-#define X86_IPI_SYNCH_FPU 0x00000008
#define X86_IPI_TLB 0x00000010
#define X86_IPI_MTRR 0x00000020
#define X86_IPI_SETPERF 0x00000040
@@ -84,10 +82,10 @@
#define X86_IPI_START_VMM 0x00000100
#define X86_IPI_STOP_VMM 0x00000200

-#define X86_NIPI 10
+#define X86_NIPI 11

-#define X86_IPI_NAMES { "halt IPI", "nop IPI", "FPU flush IPI", \
- "FPU synch IPI", "TLB shootdown IPI", \
+#define X86_IPI_NAMES { "halt IPI", "nop IPI", NULL, \
+ NULL, "TLB shootdown IPI", \
"MTRR update IPI", "setperf IPI", "ddb IPI", \
"VMM start IPI", "VMM stop IPI" }

Index: sys/arch/amd64/include/pcb.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/pcb.h,v
retrieving revision 1.16
diff -u -p -r1.16 pcb.h
--- sys/arch/amd64/include/pcb.h 26 Apr 2017 07:05:24 -0000 1.16
+++ sys/arch/amd64/include/pcb.h 21 Jun 2018 11:54:01 -0000
@@ -69,7 +69,6 @@

#include <sys/signal.h>

-#include <machine/tss.h>
#include <machine/fpu.h>

/*
@@ -84,9 +83,7 @@ struct pcb {
u_int64_t pcb_kstack; /* kernel stack address */
u_int64_t pcb_fsbase; /* per-thread offset: %fs */
caddr_t pcb_onfault; /* copyin/out fault recovery */
- struct cpu_info *pcb_fpcpu; /* cpu holding our fp state. */
struct pmap *pcb_pmap; /* back pointer to our pmap */
- int pcb_cr0; /* saved image of CR0 */
};

#ifdef _KERNEL
Index: sys/arch/amd64/include/proc.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/proc.h,v
retrieving revision 1.9
diff -u -p -r1.9 proc.h
--- sys/arch/amd64/include/proc.h 13 Apr 2017 03:52:25 -0000 1.9
+++ sys/arch/amd64/include/proc.h 21 Jun 2018 11:54:01 -0000
@@ -46,7 +46,6 @@ struct mdproc {
};

/* md_flags */
-#define MDP_USEDFPU 0x0001 /* has used the FPU */
#define MDP_IRET 0x0002 /* return via iret, not sysret */
/* (iret can restore r11 and rcx) */

Index: sys/arch/amd64/include/specialreg.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/specialreg.h,v
retrieving revision 1.61.4.1
diff -u -p -r1.61.4.1 specialreg.h
--- sys/arch/amd64/include/specialreg.h 26 Feb 2018 12:29:48 -0000 1.61.4.1
+++ sys/arch/amd64/include/specialreg.h 21 Jun 2018 11:54:01 -0000
@@ -1386,3 +1386,15 @@
#define PAT_WB 0x6UL
#define PAT_UCMINUS 0x7UL

+/*
+ * XSAVE subfeatures (cpuid 0xd, leaf 1)
+ */
+#define XSAVE_XSAVEOPT 0x1UL
+#define XSAVE_XSAVEC 0x2UL
+#define XSAVE_XGETBV1 0x4UL
+#define XSAVE_XSAVES 0x8UL
+
+/*
+ * Default cr0 flags.
+ */
+#define CR0_DEFAULT (CR0_PE|CR0_PG|CR0_NE|CR0_WP)