Re: [RFC PATCH v2 for 4.15 08/14] Provide cpu_opv system call

From: Boqun Feng
Date: Mon Nov 06 2017 - 21:05:45 EST


On Mon, Nov 06, 2017 at 03:56:38PM -0500, Mathieu Desnoyers wrote:
[...]
> +static int cpu_op_pin_pages(unsigned long addr, unsigned long len,
> + struct page ***pinned_pages_ptr, size_t *nr_pinned,
> + int write)
> +{
> + struct page *pages[2];
> + int ret, nr_pages;
> +
> + if (!len)
> + return 0;
> + nr_pages = cpu_op_range_nr_pages(addr, len);
> + BUG_ON(nr_pages > 2);
> + if (*nr_pinned + nr_pages > NR_PINNED_PAGES_ON_STACK) {

Is this a bug? Seems you will kzalloc() every time if *nr_pinned is
bigger than NR_PINNED_PAGES_ON_STACK, which will result in memory
leaking.

I think the logic here is complex enough for us to introduce a
structure, like:

struct cpu_opv_page_pinner {
int nr_pinned;
bool is_kmalloc;
struct page **pinned_pages;
};

Thoughts?

Regards,
Boqun

> + struct page **pinned_pages =
> + kzalloc(CPU_OP_VEC_LEN_MAX * CPU_OP_MAX_PAGES
> + * sizeof(struct page *), GFP_KERNEL);
> + if (!pinned_pages)
> + return -ENOMEM;
> + memcpy(pinned_pages, *pinned_pages_ptr,
> + *nr_pinned * sizeof(struct page *));
> + *pinned_pages_ptr = pinned_pages;
> + }
> +again:
> + ret = get_user_pages_fast(addr, nr_pages, write, pages);
> + if (ret < nr_pages) {
> + if (ret > 0)
> + put_page(pages[0]);
> + return -EFAULT;
> + }
> + /*
> + * Refuse device pages, the zero page, pages in the gate area,
> + * and special mappings.
> + */
> + ret = cpu_op_check_pages(pages, nr_pages);
> + if (ret == -EAGAIN) {
> + put_page(pages[0]);
> + if (nr_pages > 1)
> + put_page(pages[1]);
> + goto again;
> + }
> + if (ret)
> + goto error;
> + (*pinned_pages_ptr)[(*nr_pinned)++] = pages[0];
> + if (nr_pages > 1)
> + (*pinned_pages_ptr)[(*nr_pinned)++] = pages[1];
> + return 0;
> +
> +error:
> + put_page(pages[0]);
> + if (nr_pages > 1)
> + put_page(pages[1]);
> + return -EFAULT;
> +}
> +
> +static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
> + struct page ***pinned_pages_ptr, size_t *nr_pinned)
> +{
> + int ret, i;
> + bool expect_fault = false;
> +
> + /* Check access, pin pages. */
> + for (i = 0; i < cpuopcnt; i++) {
> + struct cpu_op *op = &cpuop[i];
> +
> + switch (op->op) {
> + case CPU_COMPARE_EQ_OP:
> + case CPU_COMPARE_NE_OP:
> + ret = -EFAULT;
> + expect_fault = op->u.compare_op.expect_fault_a;
> + if (!access_ok(VERIFY_READ, op->u.compare_op.a,
> + op->len))
> + goto error;
> + ret = cpu_op_pin_pages(
> + (unsigned long)op->u.compare_op.a,
> + op->len, pinned_pages_ptr, nr_pinned, 0);
> + if (ret)
> + goto error;
> + ret = -EFAULT;
> + expect_fault = op->u.compare_op.expect_fault_b;
> + if (!access_ok(VERIFY_READ, op->u.compare_op.b,
> + op->len))
> + goto error;
> + ret = cpu_op_pin_pages(
> + (unsigned long)op->u.compare_op.b,
> + op->len, pinned_pages_ptr, nr_pinned, 0);
> + if (ret)
> + goto error;
> + break;
> + case CPU_MEMCPY_OP:
> + ret = -EFAULT;
> + expect_fault = op->u.memcpy_op.expect_fault_dst;
> + if (!access_ok(VERIFY_WRITE, op->u.memcpy_op.dst,
> + op->len))
> + goto error;
> + ret = cpu_op_pin_pages(
> + (unsigned long)op->u.memcpy_op.dst,
> + op->len, pinned_pages_ptr, nr_pinned, 1);
> + if (ret)
> + goto error;
> + ret = -EFAULT;
> + expect_fault = op->u.memcpy_op.expect_fault_src;
> + if (!access_ok(VERIFY_READ, op->u.memcpy_op.src,
> + op->len))
> + goto error;
> + ret = cpu_op_pin_pages(
> + (unsigned long)op->u.memcpy_op.src,
> + op->len, pinned_pages_ptr, nr_pinned, 0);
> + if (ret)
> + goto error;
> + break;
> + case CPU_ADD_OP:
> + ret = -EFAULT;
> + expect_fault = op->u.arithmetic_op.expect_fault_p;
> + if (!access_ok(VERIFY_WRITE, op->u.arithmetic_op.p,
> + op->len))
> + goto error;
> + ret = cpu_op_pin_pages(
> + (unsigned long)op->u.arithmetic_op.p,
> + op->len, pinned_pages_ptr, nr_pinned, 1);
> + if (ret)
> + goto error;
> + break;
> + case CPU_OR_OP:
> + case CPU_AND_OP:
> + case CPU_XOR_OP:
> + ret = -EFAULT;
> + expect_fault = op->u.bitwise_op.expect_fault_p;
> + if (!access_ok(VERIFY_WRITE, op->u.bitwise_op.p,
> + op->len))
> + goto error;
> + ret = cpu_op_pin_pages(
> + (unsigned long)op->u.bitwise_op.p,
> + op->len, pinned_pages_ptr, nr_pinned, 1);
> + if (ret)
> + goto error;
> + break;
> + case CPU_LSHIFT_OP:
> + case CPU_RSHIFT_OP:
> + ret = -EFAULT;
> + expect_fault = op->u.shift_op.expect_fault_p;
> + if (!access_ok(VERIFY_WRITE, op->u.shift_op.p,
> + op->len))
> + goto error;
> + ret = cpu_op_pin_pages(
> + (unsigned long)op->u.shift_op.p,
> + op->len, pinned_pages_ptr, nr_pinned, 1);
> + if (ret)
> + goto error;
> + break;
> + case CPU_MB_OP:
> + break;
> + default:
> + return -EINVAL;
> + }
> + }
> + return 0;
> +
> +error:
> + for (i = 0; i < *nr_pinned; i++)
> + put_page((*pinned_pages_ptr)[i]);
> + *nr_pinned = 0;
> + /*
> + * If faulting access is expected, return EAGAIN to user-space.
> + * It allows user-space to distinguish between a fault caused by
> + * an access which is expect to fault (e.g. due to concurrent
> + * unmapping of underlying memory) from an unexpected fault from
> + * which a retry would not recover.
> + */
> + if (ret == -EFAULT && expect_fault)
> + return -EAGAIN;
> + return ret;
> +}
[...]

Attachment: signature.asc
Description: PGP signature