[PATCH 2/2] x86/msr: Allow unprivileged read access to some MSRs

From: Tim Wiederhake
Date: Tue May 23 2023 - 15:55:46 EST


Delaying access control allows unprivileged processes to
read specific MSRs, such as IA32_CORE_CAPABILITIES and
IA32_ARCH_CAPABILITIES. This is helpful for e.g. qemu and
libvirt who require the raw MSR content to calculate host
CPU capabilities. Other programs might be interested in
IA32_EFER for x86-64-v1 detection.

Signed-off-by: Tim Wiederhake <twiederh@xxxxxxxxxx>
---
arch/x86/kernel/msr.c | 38 +++++++++++++++++++++++++++++++++-----
1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 058f2b67d0c7..9485aa7f8161 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -50,6 +50,23 @@ enum allow_write_msrs {

static enum allow_write_msrs allow_writes = MSR_WRITES_DEFAULT;

+static int filter_read(struct file *file, u32 reg)
+{
+ if (file->private_data)
+ return 0;
+
+ switch (reg) {
+ case MSR_IA32_CORE_CAPS:
+ case MSR_IA32_ARCH_CAPABILITIES:
+ case MSR_EFER:
+ return 0;
+ default:
+ break;
+ }
+
+ return -EPERM;
+}
+
static ssize_t msr_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -59,6 +76,10 @@ static ssize_t msr_read(struct file *file, char __user *buf,
int cpu = iminor(file_inode(file));
int err = 0;

+ err = filter_read(file, reg);
+ if (err)
+ return err;
+
if (count < 8)
return -EINVAL; /* Invalid chunk size */

@@ -71,7 +92,7 @@ static ssize_t msr_read(struct file *file, char __user *buf,
return 8;
}

-static int filter_write(u32 reg)
+static int filter_write(struct file *file, u32 reg)
{
/*
* MSRs writes usually happen all at once, and can easily saturate kmsg.
@@ -83,6 +104,9 @@ static int filter_write(u32 reg)
*/
static DEFINE_RATELIMIT_STATE(fw_rs, 30 * HZ, 1);

+ if (!file->private_data)
+ return -EPERM;
+
switch (allow_writes) {
case MSR_WRITES_ON: return 0;
case MSR_WRITES_OFF: return -EPERM;
@@ -113,7 +137,7 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
if (err)
return err;

- err = filter_write(reg);
+ err = filter_write(file, reg);
if (err)
return err;

@@ -156,6 +180,9 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
err = -EFAULT;
break;
}
+ err = filter_read(file, regs[1]);
+ if (err)
+ return err;
err = rdmsr_safe_regs_on_cpu(cpu, regs);
if (err)
break;
@@ -176,7 +203,7 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
if (err)
break;

- err = filter_write(regs[1]);
+ err = filter_write(file, regs[1]);
if (err)
return err;

@@ -202,8 +229,7 @@ static int msr_open(struct inode *inode, struct file *file)
unsigned int cpu = iminor(file_inode(file));
struct cpuinfo_x86 *c;

- if (!capable(CAP_SYS_RAWIO))
- return -EPERM;
+ file->private_data = (void *)(capable(CAP_SYS_RAWIO));

if (cpu >= nr_cpu_ids || !cpu_online(cpu))
return -ENXIO; /* No such CPU */
@@ -245,6 +271,8 @@ static int msr_device_destroy(unsigned int cpu)

static char *msr_devnode(const struct device *dev, umode_t *mode)
{
+ if (mode)
+ *mode = 0644;
return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
}

--
2.39.2