untrusted comment: signature from openbsd 6.2 base secret key
RWRVWzAMgtyg7h6Z/ES+ftCrC3y4jz05b9Q4N4uIZDqQEzb7lw6vB6BGumpp3us1ydI/8HGsYSlzPUl7ai/pMISPf6LswZDJZAI=
OpenBSD 6.2 errata 017, June 24, 2018:
Intel CPUs speculatively access FPU registers even when FPU is disabled,
so data (including AES keys) from previous contexts could be discovered
if using lazy-save approach. Switch to eager-saving approach.
And then rebuild and install the kernel:
KK=`sysctl -n kern.osversion | cut -d# -f1`
cd /usr/src/sys/arch/`machine`/compile/$KK
make obj
make config
make
make install
/*
- * We do lazy initialization and switching using the TS bit in cr0 and the
- * MDP_USEDFPU bit in mdproc.
- *
- * DNA exceptions are handled like this:
- *
- * 1) If there is no FPU, return and go to the emulator.
- * 2) If someone else has used the FPU, save its state into that process' PCB.
- * 3a) If MDP_USEDFPU is not set, set it and initialize the FPU.
- * 3b) Otherwise, reload the process' previous FPU state.
- *
- * When a process is created or exec()s, its saved cr0 image has the TS bit
- * set and the MDP_USEDFPU bit clear. The MDP_USEDFPU bit is set when the
- * process first gets a DNA and the FPU is initialized. The TS bit is turned
- * off when the FPU is used, and turned on again later when the process' FPU
- * state is saved.
- */
-
-/*
* The mask of enabled XSAVE features.
*/
uint64_t xsave_mask;
-void fpudna(struct cpu_info *, struct trapframe *);
static int x86fpflags_to_siginfo(u_int32_t);
-#ifdef DIAGNOSTIC
- /*
- * At this point, fpcurproc should be curproc. If it wasn't,
- * the TS bit should be set, and we should have gotten a DNA exception.
- */
- if (p != curproc)
- panic("fputrap: wrong proc");
-#endif
+ KASSERT(ci->ci_flags & CPUF_USERXSTATE);
+ ci->ci_flags &= ~CPUF_USERXSTATE;
+ fpusavereset(sfp);
-/*
- * Implement device not available (DNA) exception
- *
- * If we were the last process to use the FPU, we can simply return.
- * Otherwise, we save the previous state, if necessary, and restore our last
- * saved state.
- */
-void
-fpudna(struct cpu_info *ci, struct trapframe *frame)
-{
- struct savefpu *sfp;
- struct proc *p;
- int s;
-
- if (ci->ci_fpsaving) {
- printf("recursive fpu trap; cr0=%x\n", rcr0());
- return;
- }
-
- s = splipi();
-
-#ifdef MULTIPROCESSOR
- p = ci->ci_curproc;
-#else
- p = curproc;
-#endif
-
- /*
- * Initialize the FPU state to clear any exceptions. If someone else
- * was using the FPU, save their state.
- */
- if (ci->ci_fpcurproc != NULL && ci->ci_fpcurproc != p) {
- fpusave_cpu(ci, ci->ci_fpcurproc != &proc0);
- uvmexp.fpswtch++;
- }
- splx(s);
-
- if (p == NULL) {
- clts();
- return;
- }
-
- KDASSERT(ci->ci_fpcurproc == NULL);
-#ifndef MULTIPROCESSOR
- KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL);
-#else
- if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
- fpusave_proc(p, 1);
-#endif
-
- p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
- clts();
-
- s = splipi();
- ci->ci_fpcurproc = p;
- p->p_addr->u_pcb.pcb_fpcpu = ci;
- splx(s);
-
- sfp = &p->p_addr->u_pcb.pcb_savefpu;
-
- if ((p->p_md.md_flags & MDP_USEDFPU) == 0) {
- fninit();
- bzero(&sfp->fp_fxsave, sizeof(sfp->fp_fxsave));
- sfp->fp_fxsave.fx_fcw = __INITIAL_NPXCW__;
- sfp->fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
- fxrstor(&sfp->fp_fxsave);
- p->p_md.md_flags |= MDP_USEDFPU;
- } else {
- if (xsave_mask) {
- if (xrstor_user(sfp, xsave_mask)) {
- fpusave_proc(p, 0); /* faulted */
- frame->tf_trapno = T_PROTFLT;
- trap(frame);
- return;
- }
- } else {
- static double zero = 0.0;
-
- /*
- * amd fpu does not restore fip, fdp, fop on fxrstor
- * thus leaking other process's execution history.
- */
- fnclex();
- __asm volatile("ffree %%st(7)\n\tfldl %0" : : "m" (zero));
- fxrstor(sfp);
- }
- }
-}
-
-
-void
-fpusave_cpu(struct cpu_info *ci, int save)
-{
- struct proc *p;
- int s;
-
- KDASSERT(ci == curcpu());
-
- p = ci->ci_fpcurproc;
- if (p == NULL)
- return;
-
- if (save) {
-#ifdef DIAGNOSTIC
- if (ci->ci_fpsaving != 0)
- panic("fpusave_cpu: recursive save!");
-#endif
- /*
- * Set ci->ci_fpsaving, so that any pending exception will be
- * thrown away. (It will be caught again if/when the FPU
- * state is restored.)
- */
- clts();
- ci->ci_fpsaving = 1;
- if (xsave_mask)
- xsave(&p->p_addr->u_pcb.pcb_savefpu, xsave_mask);
- else
- fxsave(&p->p_addr->u_pcb.pcb_savefpu);
- ci->ci_fpsaving = 0;
- }
-
- stts();
- p->p_addr->u_pcb.pcb_cr0 |= CR0_TS;
-
- s = splipi();
- p->p_addr->u_pcb.pcb_fpcpu = NULL;
- ci->ci_fpcurproc = NULL;
- splx(s);
-}
-
-/*
- * Save p's FPU state, which may be on this processor or another processor.
- */
-void
-fpusave_proc(struct proc *p, int save)
-{
- struct cpu_info *ci = curcpu();
- struct cpu_info *oci;
-
- KDASSERT(p->p_addr != NULL);
-
- oci = p->p_addr->u_pcb.pcb_fpcpu;
- if (oci == NULL)
- return;
-
-#if defined(MULTIPROCESSOR)
- if (oci == ci) {
- int s = splipi();
- fpusave_cpu(ci, save);
- splx(s);
- } else {
- oci->ci_fpsaveproc = p;
- x86_send_ipi(oci,
- save ? X86_IPI_SYNCH_FPU : X86_IPI_FLUSH_FPU);
- while (p->p_addr->u_pcb.pcb_fpcpu != NULL)
- CPU_BUSY_CYCLE();
- }
-#else
- KASSERT(ci->ci_fpcurproc == p);
- fpusave_cpu(ci, save);
-#endif
-}
-
void
fpu_kernel_enter(void)
{
- struct cpu_info *ci = curcpu();
- uint32_t cw;
- int s;
-
- /*
- * Fast path. If the kernel was using the FPU before, there
- * is no work to do besides clearing TS.
- */
- if (ci->ci_fpcurproc == &proc0) {
- clts();
- return;
- }
-
- s = splipi();
+ struct cpu_info *ci = curcpu();
- if (ci->ci_fpcurproc != NULL) {
- fpusave_cpu(ci, 1);
- uvmexp.fpswtch++;
+ /* save curproc's FPU state if we haven't already */
+ if (ci->ci_flags & CPUF_USERXSTATE) {
+ ci->ci_flags &= ~CPUF_USERXSTATE;
+ fpusavereset(&curproc->p_addr->u_pcb.pcb_savefpu);
}
-
- /* Claim the FPU */
- ci->ci_fpcurproc = &proc0;
-
- splx(s);
-
- /* Disable DNA exceptions */
- clts();
-
- /* Initialize the FPU */
- fninit();
- cw = __INITIAL_NPXCW__;
- fldcw(&cw);
- cw = __INITIAL_MXCSR__;
- ldmxcsr(&cw);
}
void
fpu_kernel_exit(void)
{
- /* Enable DNA exceptions */
- stts();
+ /* make sure we don't leave anything in the registers */
+ fpureset();
}
Index: sys/arch/amd64/amd64/genassym.cf
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/genassym.cf,v
retrieving revision 1.31.8.1
diff -u -p -r1.31.8.1 genassym.cf
--- sys/arch/amd64/amd64/genassym.cf 26 Feb 2018 12:29:48 -0000 1.31.8.1
+++ sys/arch/amd64/amd64/genassym.cf 21 Jun 2018 11:54:01 -0000
@@ -94,9 +94,8 @@ member pcb_rbp
member pcb_kstack
member pcb_fsbase
member pcb_onfault
-member pcb_fpcpu
member pcb_pmap
-member pcb_cr0
+member pcb_savefpu
struct pmap
member pm_cpus
@@ -131,7 +130,8 @@ member CPU_INFO_USER_CR3 ci_user_cr3
member CPU_INFO_KERN_RSP ci_kern_rsp
member CPU_INFO_INTR_RSP ci_intr_rsp
- movl CPUVAR(CPUID),%edi
+ movl CPUVAR(CPUID),%r9d
+
+ /* for the FPU/"extended CPU state" handling below */
+ movq xsave_mask(%rip),%rdx
+ movl %edx,%eax
+ shrq $32,%rdx
/* If old proc exited, don't bother. */
testq %r13,%r13
@@ -358,7 +364,7 @@ ENTRY(cpu_switchto)
* %rax, %rcx - scratch
* %r13 - old proc, then old pcb
* %r12 - new proc
- * %edi - cpuid
+ * %r9d - cpuid
*/
movq P_ADDR(%r13),%r13
@@ -366,16 +372,46 @@ ENTRY(cpu_switchto)
/* clear the old pmap's bit for the cpu */
movq PCB_PMAP(%r13),%rcx
lock
- btrq %rdi,PM_CPUS(%rcx)
+ btrq %r9,PM_CPUS(%rcx)
/* Save stack pointers. */
movq %rsp,PCB_RSP(%r13)
movq %rbp,PCB_RBP(%r13)
+ /*
+ * If the old proc ran in userspace then save the
+ * floating-point/"extended state" registers
+ */
+ testl $CPUF_USERXSTATE,CPUVAR(FLAGS)
+ jz .Lxstate_reset
+
+ movq %r13, %rdi
+#if PCB_SAVEFPU != 0
+ addq $PCB_SAVEFPU,%rdi
+#endif
+ CODEPATCH_START
+ .byte 0x48; fxsave (%rdi) /* really fxsave64 */
+ CODEPATCH_END(CPTAG_XSAVE)
+
switch_exited:
- /* did old proc run in userspace? then reset the segment regs */
- btrl $CPUF_USERSEGS_BIT, CPUVAR(FLAGS)
- jnc restore_saved
+ /* now clear the xstate */
+ movq proc0paddr(%rip),%rdi
+#if PCB_SAVEFPU != 0
+ addq $PCB_SAVEFPU,%rdi
+#endif
+ CODEPATCH_START
+ .byte 0x48; fxrstor (%rdi) /* really fxrstor64 */
+ CODEPATCH_END(CPTAG_XRSTOR)
+ andl $~CPUF_USERXSTATE,CPUVAR(FLAGS)
+
+.Lxstate_reset:
+ /*
+ * If the segment registers haven't been reset since the old proc
+ * ran in userspace then reset them now
+ */
+ testl $CPUF_USERSEGS,CPUVAR(FLAGS)
+ jz restore_saved
+ andl $~CPUF_USERSEGS,CPUVAR(FLAGS)
/* set %ds, %es, %fs, and %gs to expected value to prevent info leak */
movw $(GSEL(GUDATA_SEL, SEL_UPL)),%ax
@@ -432,32 +468,17 @@ restore_saved:
0:
/* set the new pmap's bit for the cpu */
- movl CPUVAR(CPUID),%edi
lock
- btsq %rdi,PM_CPUS(%rcx)
+ btsq %r9,PM_CPUS(%rcx)
#ifdef DIAGNOSTIC
jc _C_LABEL(switch_pmcpu_set)
#endif
@@ -529,7 +550,7 @@ IDTVEC(syscall)
* %rip and the original rflags has been copied to %r11. %cs and
* %ss have been updated to the kernel segments, but %rsp is still
* the user-space value.
- * First order of business is to swap to the kernel gs.base so that
+ * First order of business is to swap to the kernel GS.base so that
* we can access our struct cpu_info and use the scratch space there
* to switch to the kernel page tables (thank you, Intel), then
* switch to our kernel stack. Once that's in place we can
@@ -563,7 +584,7 @@ NENTRY(Xsyscall_untramp)
movq %r11, TF_RFLAGS(%rsp) /* old rflags from syscall insn */
movq $(GSEL(GUCODE_SEL, SEL_UPL)), TF_CS(%rsp)
movq %rcx,TF_RIP(%rsp)
- movq $2,TF_ERR(%rsp) /* ignored */
+ movq %rax,TF_ERR(%rsp) /* stash syscall # for SPL check */
movq CPUVAR(CURPROC),%r14
movq %rsp,P_MD_REGS(%r14) # save pointer to frame
@@ -590,8 +611,17 @@ NENTRY(Xsyscall_untramp)
/* Could registers have been changed that require an iretq? */
testl $MDP_IRET, P_MD_FLAGS(%r14)
- jne intr_fast_exit
+ jne intr_user_exit_post_ast
+
+ /* Restore FPU/"extended CPU state" if it's not already in the CPU */
+ testl $CPUF_USERXSTATE,CPUVAR(FLAGS)
+ jz .Lsyscall_restore_xstate
+
+ /* Restore FS.base if it's not already in the CPU */
+ testl $CPUF_USERSEGS,CPUVAR(FLAGS)
+ jz .Lsyscall_restore_fsbase
- /* Restore FS.base if it's not already in the CPU */
- btsl $CPUF_USERSEGS_BIT,CPUVAR(FLAGS)
- jc 99f
- movq CPUVAR(CURPCB),%rdx
- movq PCB_FSBASE(%rdx),%rax
- movq %rax,%rdx
- shrq $32,%rdx
- movl $MSR_FSBASE,%ecx
- wrmsr
-99:
-
/*
* We need to finish reading from the trapframe, then switch
* to the user page tables, swapgs, and return. We need
@@ -642,11 +661,42 @@ KUENTRY(syscall_trampback)
sysretq
/*
- * Return via iretq, for real interrupts and signal returns
+ * Returning to userspace via iretq. We do things in this order:
+ * - check for ASTs
+ * - restore FPU/"extended CPU state" if it's not already in the CPU
+ * - DIAGNOSTIC: no more C calls after this, so check the SPL
+ * - restore FS.base if it's not already in the CPU
+ * - restore most registers
+ * - update the iret frame from the trapframe
+ * - finish reading from the trapframe
+ * - switch to the trampoline stack \
+ * - jump to the .kutext segment |-- Meltdown workaround
+ * - switch to the user page tables /
+ * - swapgs
+ * - iretq
*/
-NENTRY(intr_fast_exit)
+NENTRY(intr_user_exit)
#ifdef DIAGNOSTIC
pushfq
popq %rdx
testq $PSL_I,%rdx
- jnz .Lintr_exit_not_blocked
+ jnz .Lintr_user_exit_not_blocked
+#endif /* DIAGNOSTIC */
+
+ /* Check for ASTs */
+ CHECK_ASTPENDING(%r11)
+ je intr_user_exit_post_ast
+ CLEAR_ASTPENDING(%r11)
+ sti
+ movq %rsp,%rdi
+ call _C_LABEL(ast)
+ cli
+ jmp intr_user_exit
+
+intr_user_exit_post_ast:
+ /* Restore FPU/"extended CPU state" if it's not already in the CPU */
+ testl $CPUF_USERXSTATE,CPUVAR(FLAGS)
+ jz .Lintr_restore_xstate
+
+#ifdef DIAGNOSTIC
+ /* no more C calls after this, so check the SPL */
+ cmpl $0,CPUVAR(ILEVEL)
+ jne .Luser_spl_not_lowered
#endif /* DIAGNOSTIC */
+
+ /* Restore FS.base if it's not already in the CPU */
+ testl $CPUF_USERSEGS,CPUVAR(FLAGS)
+ jz .Lintr_restore_fsbase
+
+.Lintr_restore_registers:
movq TF_RDI(%rsp),%rdi
movq TF_RSI(%rsp),%rsi
movq TF_R8(%rsp),%r8
@@ -697,30 +786,7 @@ NENTRY(intr_fast_exit)
movq TF_RBP(%rsp),%rbp
movq TF_RBX(%rsp),%rbx
- testq $SEL_RPL,TF_CS(%rsp)
- je intr_exit_recurse /* returning back to kernel? */
-
- /* returning to userspace. XXX fix up iret frame here */
-
- /* restore FS.base if it's not already in the CPU */
- btsl $CPUF_USERSEGS_BIT,CPUVAR(FLAGS)
- jc 99f
- movq CPUVAR(CURPCB),%rdx /* for below */
- movq PCB_FSBASE(%rdx),%rax
- movq %rax,%rdx
- shrq $32,%rdx
- movl $MSR_FSBASE,%ecx
- wrmsr
-99:
/*
- * Returning to userspace. We need to go things in this order:
- * - update the iret frame from the trapframe
- * - finish reading from the trapframe
- * - switch to the trampoline stack
- * - jump to the .kutext segment
- * - switch to the user page tables
- * - swapgs
- * - iretq
* To get the final value for the register that was used
* for the mov to %cr3, we need access to somewhere accessible
* on the user page tables, so we save it in CPUVAR(SCRATCH)
@@ -758,7 +824,101 @@ KUENTRY(iretq_tramp)
_C_LABEL(doreti_iret):
iretq
-/*
- * Set up TSS for a new PCB.
- */
-
-#ifdef MULTIPROCESSOR
-void
-x86_64_init_pcb_tss_ldt(struct cpu_info *ci)
-{
- struct pcb *pcb = ci->ci_idle_pcb;
-
- pcb->pcb_cr0 = rcr0();
-}
-#endif /* MULTIPROCESSOR */
-
bios_diskinfo_t *
bios_getdiskinfo(dev_t dev)
{
@@ -579,6 +564,7 @@ sendsig(sig_t catcher, int sig, int mask
struct trapframe *tf = p->p_md.md_regs;
struct sigacts *psp = p->p_p->ps_sigacts;
struct sigcontext ksc;
+ struct savefpu *sfp = &p->p_addr->u_pcb.pcb_savefpu;
siginfo_t ksi;
register_t sp, scp, sip;
u_long sss;
@@ -597,17 +583,19 @@ sendsig(sig_t catcher, int sig, int mask
sp &= ~15ULL; /* just in case */
sss = (sizeof(ksc) + 15) & ~15;
- if (p->p_md.md_flags & MDP_USEDFPU) {
- fpusave_proc(p, 1);
- sp -= fpu_save_len;
- ksc.sc_fpstate = (struct fxsave64 *)sp;
- if (copyout(&p->p_addr->u_pcb.pcb_savefpu.fp_fxsave,
- (void *)sp, fpu_save_len))
- sigexit(p, SIGILL);
+ /* Save FPU state to PCB if necessary, then copy it out */
+ if (curcpu()->ci_flags & CPUF_USERXSTATE) {
+ curcpu()->ci_flags &= ~CPUF_USERXSTATE;
+ fpusavereset(&p->p_addr->u_pcb.pcb_savefpu);
+ }
+ sp -= fpu_save_len;
+ ksc.sc_fpstate = (struct fxsave64 *)sp;
+ if (copyout(sfp, (void *)sp, fpu_save_len))
+ sigexit(p, SIGILL);
- /* Signal handlers get a completely clean FP state */
- p->p_md.md_flags &= ~MDP_USEDFPU;
- }
+ /* Now reset the FPU state in PCB */
+ memcpy(&p->p_addr->u_pcb.pcb_savefpu,
+ &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len);
sip = 0;
if (psp->ps_siginfo & sigmask(sig)) {
@@ -637,6 +625,9 @@ sendsig(sig_t catcher, int sig, int mask
tf->tf_rflags &= ~(PSL_T|PSL_D|PSL_VM|PSL_AC);
tf->tf_rsp = scp;
tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
+
+ /* The reset state _is_ the userspace state for this thread now */
+ curcpu()->ci_flags |= CPUF_USERXSTATE;
}
- if (p->p_md.md_flags & MDP_USEDFPU)
- fpusave_proc(p, 0);
+ /* Current state is obsolete; toss it and force a reload */
+ if (curcpu()->ci_flags & CPUF_USERXSTATE) {
+ curcpu()->ci_flags &= ~CPUF_USERXSTATE;
+ fpureset();
+ }
- if (ksc.sc_fpstate) {
+ /* Copy in the FPU state to restore */
+ if (__predict_true(ksc.sc_fpstate != NULL)) {
struct fxsave64 *fx = &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave;
ksc.sc_trapno = tf->tf_trapno;
@@ -707,6 +705,7 @@ sys_sigreturn(struct proc *p, void *v, r
* when a signal was being delivered, the process will be
* completely restored, including the userland %rcx and %r11
* registers which the 'sysretq' instruction cannot restore.
+ * Also need to make sure we can handle faulting on xrstor.
*/
p->p_md.md_flags |= MDP_IRET;
- /* If we were using the FPU, forget about it. */
- if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
- fpusave_proc(p, 0);
- p->p_md.md_flags &= ~MDP_USEDFPU;
+ /* Reset FPU state in PCB */
+ memcpy(&p->p_addr->u_pcb.pcb_savefpu,
+ &proc0.p_addr->u_pcb.pcb_savefpu, fpu_save_len);
+
+ if (curcpu()->ci_flags & CPUF_USERXSTATE) {
+ /* state in CPU is obsolete; reset it */
+ fpureset();
+ } else {
+ /* the reset state _is_ the userspace state now */
+ curcpu()->ci_flags |= CPUF_USERXSTATE;
+ }
+
+ /* To reset all registers we have to return via iretq */
p->p_md.md_flags |= MDP_IRET;
- /*
- * If iretq faults, we'll get a trap at doreti_iret with CPL==0 but
- * the user's GS.base, which INTRENTRY wouldn't handle correctly
- * (it would skip the swapgs), so locally expand both it and
- * INTR_SAVE_GPRS, but add an extra test comparing %rip to doreti_iret
- * so that we can do the necessary swapgs in that case.
- */
+/*
+ * The #GP (general protection fault) handler has a couple weird cases
+ * to handle:
+ * - trapping in iretq to userspace and
+ * - trapping in xrstor in the kernel.
+ * We detect both of these by examining the %rip in the iretq_frame.
+ * Handling them is done by updating %rip in the iretq_frame to point
+ * to a stub handler of some sort and then iretq'ing to it. For the
+ * iretq fault we resume in a stub which acts like we got a fresh #GP.
+ * For the xrstor fault we resume to a stub which returns an error to
+ * the routine that requested the xrstor.
+ */
IDTVEC(trap0d)
+ pushq %rdx
pushq %rcx
- leaq _C_LABEL(doreti_iret)(%rip),%rcx
- cmpq %rcx,16(%rsp) /* over %rcx and err to %rip */
+ movq 24(%rsp),%rdx /* over %r[cd]x and err to %rip */
+ leaq doreti_iret(%rip),%rcx
+ cmpq %rcx,%rdx
+ je .Lhandle_doreti
+ leaq xrstor_fault(%rip),%rcx
+ cmpq %rcx,%rdx
+ je .Lhandle_xrstor
popq %rcx
- je 1f
- testq $SEL_RPL,16(%rsp) /* over err and %rip to %cs */
- je INTRENTRY_LABEL(trap0d)
-1: swapgs
- movq %rax,CPUVAR(SCRATCH)
- movq CPUVAR(KERN_CR3),%rax
- testq %rax,%rax
- jz 98f
- movq %rax,%cr3
- jmp 98f
- .text
- .globl INTRENTRY_LABEL(trap0d)
-INTRENTRY_LABEL(trap0d): /* from kernel */
- pushq $T_PROTFLT
- subq $152,%rsp
- movq %rcx,TF_RCX(%rsp)
- jmp 99f
-98: /* from userspace */
- movq CPUVAR(KERN_RSP),%rax
- xchgq %rax,%rsp
- movq %rcx,TF_RCX(%rsp)
- /* set trapno in the trap frame */
- movq $T_PROTFLT,TF_TRAPNO(%rsp)
- /* copy err and iretq frame to the trap frame */
- movq 0(%rax),%rcx
- movq %rcx,TF_ERR(%rsp)
- add $8,%rax
- movq IRETQ_RIP(%rax),%rcx
- movq %rcx,TF_RIP(%rsp)
- movq IRETQ_CS(%rax),%rcx
- movq %rcx,TF_CS(%rsp)
- movq IRETQ_RFLAGS(%rax),%rcx
- movq %rcx,TF_RFLAGS(%rsp)
- movq IRETQ_RSP(%rax),%rcx
- movq %rcx,TF_RSP(%rsp)
- movq IRETQ_SS(%rax),%rcx
- movq %rcx,TF_SS(%rsp)
- movq CPUVAR(SCRATCH),%rax
-99: INTR_SAVE_MOST_GPRS_NO_ADJ
- sti
- jmp calltrap
+ popq %rdx
+ TRAP(T_PROTFLT)
+
+.Lhandle_xrstor:
+ /* xrstor faulted; just resume in xrstor_resume */
+ leaq xrstor_resume(%rip),%rcx
+ jmp 1f
+
+.Lhandle_doreti:
+ /* iretq faulted; resume in a stub that acts like we got a #GP */
+ leaq .Lhandle_doreti_resume(%rip),%rcx
+1: movq %rcx,24(%rsp) /* over %r[cd]x and err to %rip */
+ popq %rcx
+ popq %rdx
+ addq $8,%rsp /* pop the err code */
+ jmp doreti_iret
+.Lhandle_doreti_resume:
+ ZTRAP(T_PROTFLT)
/*
- * If an error is detected during trap, syscall, or interrupt exit, trap() will
- * change %rip to point to this label. At that point, we'll be running with
- * the kernel GS.base, but the trap frame will be from CPL==3, so we can't
- * go through INTRENTRY as it would do the swapgs that we don't want/need.
- * So, locally expand INTRENTRY but without the swapgs: manually
- * clean up the stack and resume as if we were handling a general
- * protection fault. This will cause the process to get a SIGBUS.
- */
-NENTRY(resume_iret)
- movq %rax,CPUVAR(SCRATCH)
- movq CPUVAR(KERN_CR3),%rax
- testq %rax,%rax
- jz INTRENTRY_LABEL(iret)
- movq %rax,%cr3
- jmp INTRENTRY_LABEL(iret)
- .text
- .globl INTRENTRY_LABEL(iret)
-INTRENTRY_LABEL(iret): /* from kernel */
- movq CPUVAR(KERN_RSP),%rax
- xchgq %rax,%rsp
- movq %rcx,TF_RCX(%rsp)
- /* set trapno+err in the trap frame */
- movq $T_PROTFLT,TF_TRAPNO(%rsp)
- movq $0,TF_ERR(%rsp)
- /* copy iretq frame to the trap frame */
- movq IRETQ_RIP(%rax),%rcx
- movq %rcx,TF_RIP(%rsp)
- movq IRETQ_CS(%rax),%rcx
- movq %rcx,TF_CS(%rsp)
- movq IRETQ_RFLAGS(%rax),%rcx
- movq %rcx,TF_RFLAGS(%rsp)
- movq IRETQ_RSP(%rax),%rcx
- movq %rcx,TF_RSP(%rsp)
- movq IRETQ_SS(%rax),%rcx
- movq %rcx,TF_SS(%rsp)
- movq CPUVAR(SCRATCH),%rax
- INTR_SAVE_MOST_GPRS_NO_ADJ
- sti
- jmp calltrap
-
-
-/*
* All traps go through here. Call the generic trap handler, and
* check for ASTs afterwards.
*/
KUENTRY(alltraps)
INTRENTRY(alltraps)
sti
-calltrap:
cld
SMAP_CLAC
#ifdef DIAGNOSTIC
@@ -376,19 +311,14 @@ calltrap:
jz 2f
.Lreal_trap:
#endif /* !defined(GPROF) && defined(DDBPROF) */
+ .globl recall_trap
+recall_trap:
movq %rsp, %rdi
call _C_LABEL(trap)
2: /* Check for ASTs on exit to user mode. */
cli
- CHECK_ASTPENDING(%r11)
- je 1f
testb $SEL_RPL,TF_CS(%rsp)
- jz 1f
-5: CLEAR_ASTPENDING(%r11)
- sti
- movq %rsp, %rdi
- call _C_LABEL(ast)
- jmp 2b
+ jnz intr_user_exit
#ifndef DIAGNOSTIC
1: INTRFASTEXIT
#else /* DIAGNOSTIC */
@@ -396,7 +326,7 @@ calltrap:
jne 3f
INTRFASTEXIT
3: sti
- movabsq $spl_lowered,%rdi
+ leaq spl_lowered(%rip),%rdi
movl CPUVAR(ILEVEL),%esi
movl %ebx,%edx
xorq %rax,%rax
@@ -601,7 +531,6 @@ KIDTVEC(resume_xen_upcall)
2:
movq $(1 << LIR_XEN),%rax
orq %rax,CPUVAR(IPENDING)
-3:
INTRFASTEXIT
#endif /* NXEN > 0 */
int
@@ -521,14 +514,8 @@ void
viac3_rnd(void *v)
{
struct timeout *tmo = v;
- unsigned int *p, i, rv, creg0, len = VIAC3_RNG_BUFSIZ;
+ unsigned int *p, i, rv, len = VIAC3_RNG_BUFSIZ;
static int buffer[VIAC3_RNG_BUFSIZ + 2]; /* XXX why + 2? */
-#ifdef MULTIPROCESSOR
- int s = splipi();
-#endif
-
- creg0 = rcr0(); /* Permit access to SIMD/FPU path */
- lcr0(creg0 & ~(CR0_EM|CR0_TS));
/*
* Here we collect the random data from the VIA C3 RNG. We make
@@ -538,12 +525,6 @@ viac3_rnd(void *v)
__asm volatile("rep xstorerng"
: "=a" (rv) : "d" (3), "D" (buffer), "c" (len*sizeof(int))
: "memory", "cc");
-
- lcr0(creg0);
-
-#ifdef MULTIPROCESSOR
- splx(s);
-#endif
- /*
- * If fpuproc != p1, then the fpu h/w state is irrelevant and the
- * state had better already be in the pcb. This is true for forks
- * but not for dumps.
- *
- * If fpuproc == p1, then we have to save the fpu h/w state to
- * p1's pcb so that we can copy it.
- */
- if (p1->p_addr->u_pcb.pcb_fpcpu != NULL)
- fpusave_proc(p1, 1);
+ /* Save the fpu h/w state to p1's pcb so that we can copy it. */
+ fpusave(&pcb1->pcb_savefpu);
/*
* Activate the address space.
@@ -137,11 +130,6 @@ cpu_fork(struct proc *p1, struct proc *p
void
cpu_exit(struct proc *p)
{
-
- /* If we were using the FPU, forget about it. */
- if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
- fpusave_proc(p, 0);
-
pmap_deactivate(p);
sched_exit(p);
}
Index: sys/arch/amd64/amd64/vmm.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.170
diff -u -p -r1.170 vmm.c
--- sys/arch/amd64/amd64/vmm.c 8 Sep 2017 05:36:51 -0000 1.170
+++ sys/arch/amd64/amd64/vmm.c 21 Jun 2018 11:54:01 -0000
@@ -3584,39 +3584,67 @@ vcpu_must_stop(struct vcpu *vcpu)
}
/*
- * vmm_fpusave
+ * vmm_fpurestore
*
- * Modified version of fpusave_cpu from fpu.c that only saves the FPU context
- * and does not call splipi/splx. Must be called with interrupts disabled.
+ * Restore the guest's FPU state, saving the existing userland thread's
+ * FPU context if necessary. Must be called with interrupts disabled.
*/
-void
-vmm_fpusave(void)
+int
+vmm_fpurestore(struct vcpu *vcpu)
{
- struct proc *p;
struct cpu_info *ci = curcpu();
- p = ci->ci_fpcurproc;
- if (p == NULL)
- return;
+ /* save vmmd's FPU state if we haven't already */
+ if (ci->ci_flags & CPUF_USERXSTATE) {
+ ci->ci_flags &= ~CPUF_USERXSTATE;
+ fpusavereset(&curproc->p_addr->u_pcb.pcb_savefpu);
+ }
- if (ci->ci_fpsaving != 0)
- panic("%s: recursive save!", __func__);
- /*
- * Set ci->ci_fpsaving, so that any pending exception will be
- * thrown away. (It will be caught again if/when the FPU
- * state is restored.)
- */
- ci->ci_fpsaving = 1;
- if (xsave_mask)
- xsave(&p->p_addr->u_pcb.pcb_savefpu, xsave_mask);
- else
- fxsave(&p->p_addr->u_pcb.pcb_savefpu);
- ci->ci_fpsaving = 0;
+ if (vcpu->vc_fpuinited) {
+ /* Restore guest XCR0 and FPU context */
+ if (vcpu->vc_gueststate.vg_xcr0 & ~xsave_mask) {
+ DPRINTF("%s: guest attempted to set invalid %s\n"
+ __func__, "bits in xcr0");
+ return EINVAL;
+ }
- p->p_addr->u_pcb.pcb_fpcpu = NULL;
- ci->ci_fpcurproc = NULL;
+ return 0;
+}
+
+/*
+ * vmm_fpusave
+ *
+ * Save the guest's FPU state. Must be called with interrupts disabled.
+ */
+void
+vmm_fpusave(struct vcpu *vcpu)
+{
+ if (xsave_mask) {
+ /* Save guest %xcr0 */
+ vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
+
+ /* Restore host %xcr0 */
+ xsetbv(0, xsave_mask);
+ }
+
+ /*
+ * Save full copy of FPU state - guest content is always
+ * a subset of host's save area (see xsetbv exit handler)
+ */
+ fpusavereset(&vcpu->vc_g_fpu);
+ vcpu->vc_fpuinited = 1;
}
/*
@@ -3839,39 +3867,10 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
/* Disable interrupts and save the current FPU state. */
disable_intr();
- clts();
- vmm_fpusave();
-
- /* Initialize the guest FPU if not inited already */
- if (!vcpu->vc_fpuinited) {
- fninit();
- bzero(&vcpu->vc_g_fpu.fp_fxsave,
- sizeof(vcpu->vc_g_fpu.fp_fxsave));
- vcpu->vc_g_fpu.fp_fxsave.fx_fcw =
- __INITIAL_NPXCW__;
- vcpu->vc_g_fpu.fp_fxsave.fx_mxcsr =
- __INITIAL_MXCSR__;
- fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
-
- vcpu->vc_fpuinited = 1;
- }
-
- if (xsave_mask) {
- /* Restore guest XCR0 and FPU context */
- if (vcpu->vc_gueststate.vg_xcr0 & ~xsave_mask) {
- DPRINTF("%s: guest attempted to set invalid "
- "bits in xcr0\n", __func__);
- ret = EINVAL;
- stts();
- enable_intr();
- break;
- }
-
- /* Restore guest %xcr0 */
- xrstor(&vcpu->vc_g_fpu, xsave_mask);
- xsetbv(0, vcpu->vc_gueststate.vg_xcr0);
- } else
- fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
+ if ((ret = vmm_fpurestore(vcpu))) {
+ enable_intr();
+ break;
+ }
KERNEL_UNLOCK();
ret = vmx_enter_guest(&vcpu->vc_control_pa,
@@ -3882,27 +3881,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
* the guest FPU state still possibly on the CPU. Save the FPU
* state before re-enabling interrupts.
*/
- if (xsave_mask) {
- /* Save guest %xcr0 */
- vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
-
- /* Restore host %xcr0 */
- xsetbv(0, xsave_mask);
-
- /*
- * Save full copy of FPU state - guest content is
- * always a subset of host's save area (see xsetbv
- * exit handler)
- */
- xsave(&vcpu->vc_g_fpu, xsave_mask);
- } else
- fxsave(&vcpu->vc_g_fpu);
-
- /*
- * FPU state is invalid, set CR0_TS to force DNA trap on next
- * access.
- */
- stts();
+ vmm_fpusave(vcpu);
enable_intr();
@@ -5715,39 +5694,10 @@ vcpu_run_svm(struct vcpu *vcpu, struct v
/* Disable interrupts and save the current FPU state. */
disable_intr();
- clts();
- vmm_fpusave();
-
- /* Initialize the guest FPU if not inited already */
- if (!vcpu->vc_fpuinited) {
- fninit();
- bzero(&vcpu->vc_g_fpu.fp_fxsave,
- sizeof(vcpu->vc_g_fpu.fp_fxsave));
- vcpu->vc_g_fpu.fp_fxsave.fx_fcw =
- __INITIAL_NPXCW__;
- vcpu->vc_g_fpu.fp_fxsave.fx_mxcsr =
- __INITIAL_MXCSR__;
- fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
-
- vcpu->vc_fpuinited = 1;
- }
-
- if (xsave_mask) {
- /* Restore guest XCR0 and FPU context */
- if (vcpu->vc_gueststate.vg_xcr0 & ~xsave_mask) {
- DPRINTF("%s: guest attempted to set invalid "
- "bits in xcr0\n", __func__);
- ret = EINVAL;
- stts();
- enable_intr();
- break;
- }
-
- /* Restore guest %xcr0 */
- xrstor(&vcpu->vc_g_fpu, xsave_mask);
- xsetbv(0, vcpu->vc_gueststate.vg_xcr0);
- } else
- fxrstor(&vcpu->vc_g_fpu.fp_fxsave);
+ if ((ret = vmm_fpurestore(vcpu))) {
+ enable_intr();
+ break;
+ }
KERNEL_UNLOCK();
@@ -5761,27 +5711,7 @@ vcpu_run_svm(struct vcpu *vcpu, struct v
* the guest FPU state still possibly on the CPU. Save the FPU
* state before re-enabling interrupts.
*/
- if (xsave_mask) {
- /* Save guest %xcr0 */
- vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
-
- /* Restore host %xcr0 */
- xsetbv(0, xsave_mask);
-
- /*
- * Save full copy of FPU state - guest content is
- * always a subset of host's save area (see xsetbv
- * exit handler)
- */
- xsave(&vcpu->vc_g_fpu, xsave_mask);
- } else
- fxsave(&vcpu->vc_g_fpu);
-
- /*
- * FPU state is invalid, set CR0_TS to force DNA trap on next
- * access.
- */
- stts();
+ vmm_fpusave(vcpu);
@@ -216,9 +212,9 @@ struct cpu_info {
#define CPUF_IDENTIFIED 0x0020 /* CPU has been identified */
#define CPUF_CONST_TSC 0x0040 /* CPU has constant TSC */
-#define CPUF_USERSEGS_BIT 7 /* CPU has curproc's segments */
-#define CPUF_USERSEGS (1<<CPUF_USERSEGS_BIT) /* and FS.base */
+#define CPUF_USERSEGS 0x0080 /* CPU has curproc's segs and FS.base */
#define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */
+#define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */
#define CPUF_PRESENT 0x1000 /* CPU is present */
#define CPUF_RUNNING 0x2000 /* CPU is running */
@@ -268,7 +264,6 @@ extern void need_resched(struct cpu_info
extern struct cpu_info *cpu_info[MAXCPUS];
/*
- * amd64 only uses the extended save/restore format used
- * by fxsave/fsrestore, to always deal with the SSE registers,
- * which are part of the ABI to pass floating point values.
- * Must be stored in memory on a 16-byte boundary.
+ * If the CPU supports xsave/xrstor then we use them so that we can provide
+ * AVX support. Otherwise we require fxsave/fxrstor, as the SSE registers
+ * are part of the ABI for passing floating point values.
+ * While fxsave/fxrstor only required 16-byte alignment for the save area,
+ * xsave/xrstor requires the save area to have 64-byte alignment.
*/
/* md_flags */
-#define MDP_USEDFPU 0x0001 /* has used the FPU */
#define MDP_IRET 0x0002 /* return via iret, not sysret */
/* (iret can restore r11 and rcx) */