Re: [PATCH 05/14] KVM: x86: Emulator fixes for eip canonical checks on near branches

From: Andy Lutomirski
Date: Fri Oct 24 2014 - 13:53:56 EST


On 10/24/2014 08:07 AM, Paolo Bonzini wrote:
> From: Nadav Amit <namit@xxxxxxxxxxxxxxxxx>
>
> Before changing rip (during jmp, call, ret, etc.) the target should be asserted
> to be canonical one, as real CPUs do. During sysret, both target rsp and rip
> should be canonical. If any of these values is noncanonical, a #GP exception
> should occur. The exception to this rule are syscall and sysenter instructions
> in which the assigned rip is checked during the assignment to the relevant
> MSRs.

Careful here. AMD CPUs (IIUC) send #PF (or maybe #GP) from CPL3 instead
of #GP from CPL0 on sysret to a non-canonical address. That behavior is
*far* better than the Intel behavior, and it may be important.

If an OS relies on that behavior on AMD CPUs, and guest ring 3 can force
guest ring 0 to do an emulated sysret to a non-canonical address, than
the guest ring3 code can own the guest ring0 code.

--Andy

>
> This patch fixes the emulator to behave as real CPUs do for near branches.
> Far branches are handled by the next patch.
>
> This fixes CVE-2014-3647.
>
> Cc: stable@xxxxxxxxxxxxxxx
> Signed-off-by: Nadav Amit <namit@xxxxxxxxxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
> arch/x86/kvm/emulate.c | 78 ++++++++++++++++++++++++++++++++++----------------
> 1 file changed, 54 insertions(+), 24 deletions(-)
>
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 047698974799..a1b9139169f6 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -564,7 +564,8 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
> return emulate_exception(ctxt, NM_VECTOR, 0, false);
> }
>
> -static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
> +static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
> + int cs_l)
> {
> switch (ctxt->op_bytes) {
> case 2:
> @@ -574,16 +575,25 @@ static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
> ctxt->_eip = (u32)dst;
> break;
> case 8:
> + if ((cs_l && is_noncanonical_address(dst)) ||
> + (!cs_l && (dst & ~(u32)-1)))
> + return emulate_gp(ctxt, 0);
> ctxt->_eip = dst;
> break;
> default:
> WARN(1, "unsupported eip assignment size\n");
> }
> + return X86EMUL_CONTINUE;
> +}
> +
> +static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
> +{
> + return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
> }
>
> -static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
> +static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
> {
> - assign_eip_near(ctxt, ctxt->_eip + rel);
> + return assign_eip_near(ctxt, ctxt->_eip + rel);
> }
>
> static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
> @@ -1998,13 +2008,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
> case 2: /* call near abs */ {
> long int old_eip;
> old_eip = ctxt->_eip;
> - ctxt->_eip = ctxt->src.val;
> + rc = assign_eip_near(ctxt, ctxt->src.val);
> + if (rc != X86EMUL_CONTINUE)
> + break;
> ctxt->src.val = old_eip;
> rc = em_push(ctxt);
> break;
> }
> case 4: /* jmp abs */
> - ctxt->_eip = ctxt->src.val;
> + rc = assign_eip_near(ctxt, ctxt->src.val);
> break;
> case 5: /* jmp far */
> rc = em_jmp_far(ctxt);
> @@ -2039,10 +2051,14 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
>
> static int em_ret(struct x86_emulate_ctxt *ctxt)
> {
> - ctxt->dst.type = OP_REG;
> - ctxt->dst.addr.reg = &ctxt->_eip;
> - ctxt->dst.bytes = ctxt->op_bytes;
> - return em_pop(ctxt);
> + int rc;
> + unsigned long eip;
> +
> + rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
> + if (rc != X86EMUL_CONTINUE)
> + return rc;
> +
> + return assign_eip_near(ctxt, eip);
> }
>
> static int em_ret_far(struct x86_emulate_ctxt *ctxt)
> @@ -2323,7 +2339,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
> {
> const struct x86_emulate_ops *ops = ctxt->ops;
> struct desc_struct cs, ss;
> - u64 msr_data;
> + u64 msr_data, rcx, rdx;
> int usermode;
> u16 cs_sel = 0, ss_sel = 0;
>
> @@ -2339,6 +2355,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
> else
> usermode = X86EMUL_MODE_PROT32;
>
> + rcx = reg_read(ctxt, VCPU_REGS_RCX);
> + rdx = reg_read(ctxt, VCPU_REGS_RDX);
> +
> cs.dpl = 3;
> ss.dpl = 3;
> ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
> @@ -2356,6 +2375,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
> ss_sel = cs_sel + 8;
> cs.d = 0;
> cs.l = 1;
> + if (is_noncanonical_address(rcx) ||
> + is_noncanonical_address(rdx))
> + return emulate_gp(ctxt, 0);
> break;
> }
> cs_sel |= SELECTOR_RPL_MASK;
> @@ -2364,8 +2386,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
> ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
> ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
>
> - ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX);
> - *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX);
> + ctxt->_eip = rdx;
> + *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
>
> return X86EMUL_CONTINUE;
> }
> @@ -2905,10 +2927,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt)
>
> static int em_call(struct x86_emulate_ctxt *ctxt)
> {
> + int rc;
> long rel = ctxt->src.val;
>
> ctxt->src.val = (unsigned long)ctxt->_eip;
> - jmp_rel(ctxt, rel);
> + rc = jmp_rel(ctxt, rel);
> + if (rc != X86EMUL_CONTINUE)
> + return rc;
> return em_push(ctxt);
> }
>
> @@ -2940,11 +2965,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
> static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
> {
> int rc;
> + unsigned long eip;
>
> - ctxt->dst.type = OP_REG;
> - ctxt->dst.addr.reg = &ctxt->_eip;
> - ctxt->dst.bytes = ctxt->op_bytes;
> - rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
> + rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
> + if (rc != X86EMUL_CONTINUE)
> + return rc;
> + rc = assign_eip_near(ctxt, eip);
> if (rc != X86EMUL_CONTINUE)
> return rc;
> rsp_increment(ctxt, ctxt->src.val);
> @@ -3271,20 +3297,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
>
> static int em_loop(struct x86_emulate_ctxt *ctxt)
> {
> + int rc = X86EMUL_CONTINUE;
> +
> register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
> if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
> (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
> - jmp_rel(ctxt, ctxt->src.val);
> + rc = jmp_rel(ctxt, ctxt->src.val);
>
> - return X86EMUL_CONTINUE;
> + return rc;
> }
>
> static int em_jcxz(struct x86_emulate_ctxt *ctxt)
> {
> + int rc = X86EMUL_CONTINUE;
> +
> if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
> - jmp_rel(ctxt, ctxt->src.val);
> + rc = jmp_rel(ctxt, ctxt->src.val);
>
> - return X86EMUL_CONTINUE;
> + return rc;
> }
>
> static int em_in(struct x86_emulate_ctxt *ctxt)
> @@ -4743,7 +4773,7 @@ special_insn:
> break;
> case 0x70 ... 0x7f: /* jcc (short) */
> if (test_cc(ctxt->b, ctxt->eflags))
> - jmp_rel(ctxt, ctxt->src.val);
> + rc = jmp_rel(ctxt, ctxt->src.val);
> break;
> case 0x8d: /* lea r16/r32, m */
> ctxt->dst.val = ctxt->src.addr.mem.ea;
> @@ -4773,7 +4803,7 @@ special_insn:
> break;
> case 0xe9: /* jmp rel */
> case 0xeb: /* jmp rel short */
> - jmp_rel(ctxt, ctxt->src.val);
> + rc = jmp_rel(ctxt, ctxt->src.val);
> ctxt->dst.type = OP_NONE; /* Disable writeback. */
> break;
> case 0xf4: /* hlt */
> @@ -4898,7 +4928,7 @@ twobyte_insn:
> break;
> case 0x80 ... 0x8f: /* jnz rel, etc*/
> if (test_cc(ctxt->b, ctxt->eflags))
> - jmp_rel(ctxt, ctxt->src.val);
> + rc = jmp_rel(ctxt, ctxt->src.val);
> break;
> case 0x90 ... 0x9f: /* setcc r/m8 */
> ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/