[RFC PATCH 6/6] crypto: sa2ul_pka: Add SA2UL PKA driver

From: Daniel Parks
Date: Mon Aug 08 2022 - 15:14:53 EST


This device exposes asymmetric crypto primitives rather than complete
operations, and it only supports memory-mapped I/O, and its memory is
only addressable one 32-bit word at a time. As a result, some of the
code might look a little different from other crypto accelerator
drivers.

Signed-off-by: Daniel Parks <danielrparks@xxxxxx>
---
drivers/crypto/Kconfig | 2 +
drivers/crypto/Makefile | 1 +
drivers/crypto/sa2ul_pka/Kconfig | 26 +
drivers/crypto/sa2ul_pka/Makefile | 3 +
drivers/crypto/sa2ul_pka/sa2ul_pka.h | 135 ++++++
drivers/crypto/sa2ul_pka/sa2ul_pka_base.c | 564 ++++++++++++++++++++++
drivers/crypto/sa2ul_pka/sa2ul_pka_dh.c | 150 ++++++
drivers/crypto/sa2ul_pka/sa2ul_pka_op.c | 205 ++++++++
drivers/crypto/sa2ul_pka/sa2ul_pka_op.h | 28 ++
drivers/crypto/sa2ul_pka/sa2ul_pka_rsa.c | 193 ++++++++
drivers/crypto/sa2ul_pka/sa2ul_pka_sg.c | 316 ++++++++++++
11 files changed, 1623 insertions(+)
create mode 100644 drivers/crypto/sa2ul_pka/Kconfig
create mode 100644 drivers/crypto/sa2ul_pka/Makefile
create mode 100644 drivers/crypto/sa2ul_pka/sa2ul_pka.h
create mode 100644 drivers/crypto/sa2ul_pka/sa2ul_pka_base.c
create mode 100644 drivers/crypto/sa2ul_pka/sa2ul_pka_dh.c
create mode 100644 drivers/crypto/sa2ul_pka/sa2ul_pka_op.c
create mode 100644 drivers/crypto/sa2ul_pka/sa2ul_pka_op.h
create mode 100644 drivers/crypto/sa2ul_pka/sa2ul_pka_rsa.c
create mode 100644 drivers/crypto/sa2ul_pka/sa2ul_pka_sg.c

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 3e6aa319920b..5238e3028ffd 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -816,7 +816,9 @@ config CRYPTO_DEV_SA2UL
K3 devices include a security accelerator engine that may be
used for crypto offload. Select this if you want to use hardware
acceleration for cryptographic algorithms on these devices.

+source "drivers/crypto/sa2ul_pka/Kconfig"
+
source "drivers/crypto/keembay/Kconfig"

endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index f81703a86b98..3c66515ded16 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -37,8 +37,9 @@ obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
obj-$(CONFIG_CRYPTO_DEV_QCOM_RNG) += qcom-rng.o
obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/
obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
obj-$(CONFIG_CRYPTO_DEV_SA2UL) += sa2ul.o
+obj-$(CONFIG_CRYPTO_DEV_SA2UL_PKA) += sa2ul_pka/
obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/
obj-$(CONFIG_ARCH_STM32) += stm32/
obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
diff --git a/drivers/crypto/sa2ul_pka/Kconfig b/drivers/crypto/sa2ul_pka/Kconfig
new file mode 100644
index 000000000000..c4f87b14878e
--- /dev/null
+++ b/drivers/crypto/sa2ul_pka/Kconfig
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config CRYPTO_DEV_SA2UL_PKA
+ tristate "Support for TI security accelerator public-key module"
+ depends on CRYPTO_DEV_SA2UL || COMPILE_TEST
+ select ARM64_CRYPTO
+ select CRYPTO_AKCIPHER
+ select CRYPTO_RSA
+ select PACKING
+ select CRYPTO_DH
+ help
+ The K3 security accelerator engine contains a public-key
+ cryptography module. Select this if you want to use hardware
+ acceleration for asymmetric cryptography on these devices. This
+ engine is not available to the Linux cores on most devices; check
+ your device tree if unsure.
+
+config CRYPTO_DEV_SA2UL_PKA_DEBUG
+ bool "Debugging options for TI security accelerator public-key module"
+ depends on CRYPTO_DEV_SA2UL_PKA
+ default n
+ help
+ Enables the module options 'snapshot' and 'poison_mem' to assist
+ in debugging the PKA driver. Choosing "y" will allow the root user
+ to extract cryptographic keys from the driver, so do not enable in
+ a production build.
diff --git a/drivers/crypto/sa2ul_pka/Makefile b/drivers/crypto/sa2ul_pka/Makefile
new file mode 100644
index 000000000000..b5af4597f186
--- /dev/null
+++ b/drivers/crypto/sa2ul_pka/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_CRYPTO_DEV_SA2UL_PKA) += sa2ul_pka.o
+sa2ul_pka-objs := sa2ul_pka_base.o sa2ul_pka_sg.o sa2ul_pka_op.o sa2ul_pka_rsa.o sa2ul_pka_dh.o
diff --git a/drivers/crypto/sa2ul_pka/sa2ul_pka.h b/drivers/crypto/sa2ul_pka/sa2ul_pka.h
new file mode 100644
index 000000000000..09e8929b2fa3
--- /dev/null
+++ b/drivers/crypto/sa2ul_pka/sa2ul_pka.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * K3 SA2UL Public Key Accelerator driver
+ *
+ * Copyright (c) 2022 Texas Instruments Incorporated - https://www.ti.com
+ *
+ * Author: Daniel Parks <danielrparks@xxxxxx>
+ */
+
+#ifndef _K3_SA2UL_PKA_
+#define _K3_SA2UL_PKA_
+
+#include <crypto/akcipher.h>
+#include <crypto/kpp.h>
+#include <linux/scatterlist.h>
+
+/* registers */
+#define PKA_APTR 0x0000
+#define PKA_BPTR 0x0004
+#define PKA_CPTR 0x0008
+#define PKA_DPTR 0x000c
+#define PKA_ALENGTH 0x0010
+#define PKA_BLENGTH 0x0014
+#define PKA_SHIFT 0x0018
+#define PKA_FUNCTION 0x001c
+#define PKA_COMPARE 0x0020
+#define PKA_MSW 0x0024
+#define PKA_DIVMSW 0x0028
+#define PKA_STATUS 0x00c0
+#define PKA_SEQ_CTRL 0x00c8
+#define PKA_OPTIONS 0x00f4
+#define PKA_SW_REV 0x00f8
+#define PKA_REVISION 0x00fc
+
+#define PKA_REV 0x1fe0
+#define PKA_CLK_CTRL 0x1fe8
+#define PKA_SYSCONFIG 0x1ff0
+#define PKA_SYSSTATUS 0x1ff4
+#define PKA_IRQSTATUS 0x1ff8
+#define PKA_IRQENABLE 0x1ffc
+
+#define PKA_REGS_SIZE (PKA_REVISION + 4 - PKA_APTR \
+ + PKA_IRQENABLE + 4 - PKA_REV)
+
+/* firmware info */
+#define PKA_FW_FMT "eip29t2_%u.%u.%u.bin"
+#define PKA_FW_RETRIES 5
+/* memory map info */
+#define PKA_PROG_RAM_SIZE 0x4000
+#define PKA_RAM_SIZE 0x1000
+
+/* register bits */
+#define PKA_FUNCTION_RUN BIT(15)
+#define PKA_FUNCTION_OP_TOP_MASK 0x38
+#define PKA_FUNCTION_OP_BOTTOM_MASK 0x3
+
+#define PKA_SEQ_CTRL_RST BIT(31)
+#define PKA_SEQ_CTRL_START BIT(8)
+#define PKA_SEQ_CTRL_STATUS_MASK 0x0000ff00
+#define PKA_SEQ_CTRL_STATUS_OFFSET 8
+
+#define PKA_SYSCONFIG_SRST BIT(1)
+#define PKA_SYSCONFIG_IDLE BIT(5)
+#define PKA_SYSCONFIG_NOIDLE BIT(4)
+
+#define PKA_SYSSTATUS_RST_COMP BIT(0)
+
+#define PKA_RST_TIMEOUT_J 2
+
+#define PKA_SW_REV_MASK 0x0fff0fff
+
+#define PKA_IRQ_MAIN BIT(0)
+
+#define PKA_MAX_VEC 520
+
+/* device configuration */
+struct pka_config {
+ bool ecc : 1;
+ bool montgomery : 1;
+ bool prog_ram : 1;
+};
+
+struct pka_version {
+ u8 major;
+ u8 minor;
+ u8 patch;
+};
+
+struct pka_snapshot {
+ u32 *regs;
+ u32 *mem;
+};
+
+struct pka_data {
+ void __iomem *base;
+ void __iomem *mem;
+ struct device *dev;
+ struct pka_config config;
+ struct mutex mutex; /* locked while the PKA is executing an operation */
+ bool running;
+ struct completion done;
+ int fw_tries;
+ char fw_name[64];
+#ifdef CONFIG_CRYPTO_DEV_SA2UL_PKA_DEBUG
+ struct pka_snapshot snapshot;
+#endif
+};
+
+struct pka_alg_template {
+ struct pka_data *data;
+ union {
+ struct akcipher_alg akcipher;
+ struct kpp_alg kpp;
+ } alg;
+};
+
+#define PKA_TFM_DATA(tfm, type) \
+ container_of((tfm)->base.__crt_alg, \
+ struct pka_alg_template, \
+ alg.type.base)->data
+
+int pka_op_wait(struct pka_data *data);
+void pka_intr_enable(struct pka_data *dev_data);
+void pka_intr_disable(struct pka_data *dev_data);
+
+#ifdef CONFIG_CRYPTO_DEV_SA2UL_PKA_DEBUG
+void pka_take_snapshot(struct pka_data *data);
+#else
+static inline void pka_take_snapshot(struct pka_data *data) {}
+#endif
+
+/* ciphers */
+extern struct pka_alg_template pka_rsa, pka_dh;
+
+#endif /* _K3_SA2UL_PKA_ */
diff --git a/drivers/crypto/sa2ul_pka/sa2ul_pka_base.c b/drivers/crypto/sa2ul_pka/sa2ul_pka_base.c
new file mode 100644
index 000000000000..46a0e6b6ea37
--- /dev/null
+++ b/drivers/crypto/sa2ul_pka/sa2ul_pka_base.c
@@ -0,0 +1,564 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * K3 SA2UL Public Key Accelerator driver
+ *
+ * Copyright (c) 2022 Texas Instruments Incorporated - https://www.ti.com
+ *
+ * Author: Daniel Parks <danielrparks@xxxxxx>
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of_device.h>
+#include <linux/err.h>
+#include <linux/packing.h>
+#include <linux/pm_runtime.h>
+#include <linux/firmware.h>
+#include <linux/interrupt.h>
+#include <linux/sysfs.h>
+#include <linux/fips.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/delay.h>
+#include <crypto/internal/akcipher.h>
+#include <crypto/internal/kpp.h>
+#include <asm/unaligned.h>
+
+#include "sa2ul_pka.h"
+
+bool poison_mem;
+bool snapshot;
+#ifdef CONFIG_CRYPTO_DEV_SA2UL_PKA_DEBUG
+module_param(poison_mem, bool, 0);
+module_param(snapshot, bool, 0);
+#endif
+
+static const struct pka_version pka_fws[] = {
+ {2, 1, 0},
+ {}
+};
+
+static inline int pka_packing(void *pbuf, u64 *uval, int startbit, int endbit,
+ size_t pbuflen, enum packing_op op)
+{
+ return packing(pbuf, uval, startbit, endbit, pbuflen, op,
+ QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN);
+}
+
+static void pka_parse_version(struct pka_version *out, u32 packed_version,
+ int startbit)
+{
+ u64 res;
+
+ WARN_ON(startbit < 11);
+ pka_packing(&packed_version, &res,
+ startbit, startbit - 3, 4, UNPACK);
+ out->major = res;
+ pka_packing(&packed_version, &res,
+ startbit - 4, startbit - 7, 4, UNPACK);
+ out->minor = res;
+ pka_packing(&packed_version, &res,
+ startbit - 8, startbit - 11, 4, UNPACK);
+ out->patch = res;
+}
+
+static void pka_set_idle(void __iomem *base, bool idle)
+{
+ writel_relaxed(idle ? PKA_SYSCONFIG_IDLE : PKA_SYSCONFIG_NOIDLE,
+ base + PKA_SYSCONFIG);
+}
+
+static int pka_request_fw(struct pka_data *dev_data);
+
+/* enable/disable interrupts device-side */
+void pka_intr_enable(struct pka_data *dev_data)
+{
+ writel_relaxed(PKA_IRQ_MAIN, dev_data->base + PKA_IRQSTATUS);
+ writel_relaxed(PKA_IRQ_MAIN, dev_data->base + PKA_IRQENABLE);
+}
+
+void pka_intr_disable(struct pka_data *dev_data)
+{
+ writel_relaxed(0, dev_data->base + PKA_IRQENABLE);
+}
+
+#ifdef CONFIG_CRYPTO_DEV_SA2UL_PKA_DEBUG
+/* debugging - allows taking snapshots of memory and register file just before
+ * dispatching an operation
+ */
+void pka_take_snapshot(struct pka_data *data)
+{
+ if (snapshot) {
+ u32 i, j = 0;
+
+ for (i = PKA_APTR; i <= PKA_REVISION; i += 4, j++)
+ data->snapshot.regs[j] = readl_relaxed(data->base + i);
+ for (i = PKA_REV; i <= PKA_IRQENABLE; i += 4, j++)
+ data->snapshot.regs[j] = readl_relaxed(data->base + i);
+ for (i = 0, j = 0; i < PKA_RAM_SIZE; i += 4, j++)
+ data->snapshot.mem[j] = readl_relaxed(data->mem + i);
+ }
+}
+
+static ssize_t read_snap_reg(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t offset, size_t count)
+{
+ struct pka_data *data = attr->private;
+
+ memcpy(buf, (u8 *)data->snapshot.regs + offset, count);
+ return count;
+}
+
+static ssize_t read_snap_mem(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t offset, size_t count)
+{
+ struct pka_data *data = attr->private;
+
+ memcpy(buf, (u8 *)data->snapshot.mem + offset, count);
+ return count;
+}
+
+BIN_ATTR(regs, 0600, read_snap_reg, NULL, PKA_REGS_SIZE);
+BIN_ATTR(mem, 0600, read_snap_mem, NULL, PKA_RAM_SIZE);
+
+static inline int setup_debug(struct pka_data *dev_data)
+{
+ int ret;
+ struct device *dev = dev_data->dev;
+ size_t i;
+
+ if (snapshot) {
+ dev_data->snapshot.regs = devm_kzalloc(dev, PKA_REGS_SIZE,
+ GFP_KERNEL);
+ dev_data->snapshot.mem = devm_kzalloc(dev, PKA_RAM_SIZE,
+ GFP_KERNEL);
+ if (!dev_data->snapshot.mem || !dev_data->snapshot.regs)
+ return -ENOMEM;
+ sysfs_bin_attr_init(bin_attr_regs);
+ sysfs_bin_attr_init(bin_attr_mem);
+ bin_attr_regs.private = dev_data;
+ bin_attr_mem.private = dev_data;
+ ret = sysfs_create_bin_file(&dev->kobj, &bin_attr_regs);
+ if (ret) {
+ dev_err(dev, "failed to create regs snapshot file\n");
+ return ret;
+ }
+ ret = sysfs_create_bin_file(&dev->kobj, &bin_attr_mem);
+ if (ret) {
+ dev_err(dev, "failed to create mem snapshot file\n");
+ return ret;
+ }
+ }
+ if (poison_mem) {
+ for (i = 0; i < PKA_RAM_SIZE; i += 4)
+ writel_relaxed(0xccccccccU, dev_data->mem + i);
+ }
+ return 0;
+}
+
+static inline void cleanup_debug(struct platform_device *pdev)
+{
+ if (snapshot) {
+ sysfs_remove_bin_file(&pdev->dev.kobj, &bin_attr_regs);
+ sysfs_remove_bin_file(&pdev->dev.kobj, &bin_attr_mem);
+ }
+}
+
+#else
+static inline int setup_debug(struct pka_data *dev_data)
+{
+ return 0;
+}
+
+static inline void cleanup_debug(struct platform_device *pdev) {}
+#endif
+
+/* actually load the firmware */
+static inline int copy_fw(const struct firmware *fw, struct pka_data *dev_data)
+{
+ size_t i;
+ void __iomem *base = dev_data->base;
+ void __iomem *ram = dev_data->mem;
+ u32 reg;
+ int ret;
+
+ writel_relaxed(PKA_SEQ_CTRL_RST, base + PKA_SEQ_CTRL);
+ for (i = 0; i < fw->size; i += 4)
+ writel_relaxed(le32_to_cpu(get_unaligned(
+ (u32 *)(fw->data + i))), ram + i);
+ writel_relaxed(0, base + PKA_SEQ_CTRL);
+ ret = readl_relaxed_poll_timeout(base + PKA_SEQ_CTRL, reg, (reg &
+ PKA_SEQ_CTRL_START) == PKA_SEQ_CTRL_START, 0,
+ PKA_RST_TIMEOUT_J);
+ if (ret) {
+ dev_warn(dev_data->dev,
+ "timed out waiting for sequencer to start 0x%08x",
+ reg);
+ dev_warn(dev_data->dev, "function: 0x%08x",
+ readl_relaxed(base + PKA_FUNCTION));
+ return -ETIMEDOUT;
+ }
+ return 0;
+}
+
+static inline int verify_fw(const struct firmware *fw,
+ struct pka_data *dev_data)
+{
+ size_t i;
+ void __iomem *base = dev_data->base;
+ void __iomem *ram = dev_data->mem;
+ u32 reg;
+ int ret;
+
+ writel_relaxed(PKA_SEQ_CTRL_RST, base + PKA_SEQ_CTRL);
+ for (i = 0; i < fw->size; i += 4) {
+ if (readl_relaxed(ram + i)
+ != le32_to_cpu(get_unaligned((u32 *)(fw->data + i)))) {
+ dev_warn(dev_data->dev,
+ "firmware download corrupted at 0x%llx\n",
+ (unsigned long long)i);
+ return -EINVAL;
+ }
+ }
+ writel_relaxed(0, base + PKA_SEQ_CTRL);
+ ret = readl_relaxed_poll_timeout(base + PKA_SEQ_CTRL, reg, (reg &
+ PKA_SEQ_CTRL_START) == PKA_SEQ_CTRL_START, 0,
+ PKA_RST_TIMEOUT_J);
+ return ret;
+}
+
+static inline void print_fw_rev(struct pka_data *dev_data)
+{
+ struct pka_version internal, custom;
+ u32 reg;
+
+ reg = readl_relaxed(dev_data->base + PKA_SW_REV) & PKA_SW_REV_MASK;
+ pka_parse_version(&internal, reg, 27);
+ pka_parse_version(&custom, reg, 11);
+ dev_info(dev_data->dev, "loaded firmware internal version %u.%u.%u, custom version %u.%u.%u\n",
+ internal.major, internal.minor, internal.patch,
+ custom.major, custom.minor, custom.patch);
+}
+
+/* load a firmware (requested from pka_request_next_fw) onto the device */
+static int pka_load_fw(const struct firmware *fw, struct pka_data *dev_data)
+{
+ void __iomem *base = dev_data->base;
+ u32 reg;
+ int ret = -ENOENT;
+
+ if (!fw) {
+ goto fw_bad;
+ } else if (fw->size > PKA_PROG_RAM_SIZE) {
+ dev_warn(dev_data->dev, "firmware is larger than max firmware size 0x%x, skipping\n",
+ PKA_PROG_RAM_SIZE);
+ goto fw_bad;
+ }
+ ret = -EAGAIN;
+ if (copy_fw(fw, dev_data))
+ goto rst_fail;
+ if (verify_fw(fw, dev_data))
+ goto rst_fail;
+ print_fw_rev(dev_data);
+ release_firmware(fw);
+ reg = readl_relaxed(base + PKA_FUNCTION);
+ if (reg & PKA_FUNCTION_RUN)
+ dev_warn(dev_data->dev,
+ "run bit set after loading firmware! 0x%08x\n", reg);
+
+ pka_intr_enable(dev_data);
+ return 0;
+
+rst_fail:
+ dev_warn(dev_data->dev,
+ "firmware download corrupt (check silicon?), retrying\n");
+ dev_data->fw_tries++;
+fw_bad:
+ release_firmware(fw);
+ return ret;
+}
+
+/* Do some basic checks to make sure this device is similar enough to the
+ * development device to work with this driver. Also determine some information
+ * about the device configuration so we know which algorithms to enable.
+ */
+static int pka_parse_options(struct pka_data *data, u32 options)
+{
+ int ret = 0;
+ u64 tmp;
+
+ dev_info(data->dev, "options 0x%x\n", options);
+ // PKCP configuration
+ pka_packing(&options, &tmp, 1, 0, 4, UNPACK);
+ if (tmp != 2) {
+ ret = -EACCES;
+ goto out;
+ }
+
+ pka_packing(&options, &tmp, 4, 2, 4, UNPACK);
+ if (tmp <= 2) {
+ data->config.montgomery = !!tmp;
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
+ // sequencer configuration
+ pka_packing(&options, &tmp, 6, 5, 4, UNPACK);
+ if (tmp != 1) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ pka_packing(&options, &tmp, 7, 7, 4, UNPACK);
+ data->config.prog_ram = tmp;
+ // GF(2^m) configuration
+ pka_packing(&options, &tmp, 23, 22, 4, UNPACK);
+ if (tmp <= 1) {
+ data->config.ecc = tmp;
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static void pka_print_hw_rev(struct pka_data *dev_data)
+{
+ struct pka_version ver;
+ u32 reg;
+
+ reg = readl_relaxed(dev_data->base + PKA_REVISION);
+ pka_parse_version(&ver, reg, 27);
+ dev_info(dev_data->dev, "HW rev %u.%u.%u\n",
+ ver.major, ver.minor, ver.patch);
+}
+
+/* try to load any available firmware into the device */
+static int pka_request_fw(struct pka_data *dev_data)
+{
+ int fw_next_idx = 0;
+ struct pka_version next_ver;
+ const struct firmware *fw;
+ int ret;
+
+ next_ver = pka_fws[fw_next_idx++];
+ while ((next_ver.major || next_ver.minor || next_ver.patch) &&
+ dev_data->fw_tries < PKA_FW_RETRIES) {
+ snprintf(dev_data->fw_name, 64, PKA_FW_FMT,
+ next_ver.major, next_ver.minor, next_ver.patch);
+
+ if (!request_firmware_direct(&fw, dev_data->fw_name,
+ dev_data->dev)) {
+ ret = pka_load_fw(fw, dev_data);
+ if (ret == 0)
+ return ret;
+ else if (ret == -EAGAIN)
+ fw_next_idx--;
+ }
+ next_ver = pka_fws[fw_next_idx++];
+ }
+
+ dev_warn(dev_data->dev, "unable to load firmware\n");
+ return -ENOENT;
+}
+
+/* interrupts support */
+irqreturn_t pka_intr(int irqnum, void *_data)
+{
+ struct pka_data *data = _data;
+ u32 reg;
+
+ reg = readl_relaxed(data->base + PKA_IRQSTATUS);
+ if (reg & PKA_IRQ_MAIN) {
+ writel_relaxed(PKA_IRQ_MAIN, data->base + PKA_IRQSTATUS);
+ return IRQ_WAKE_THREAD;
+ } else {
+ return IRQ_NONE;
+ }
+}
+
+/* data->running is not synchronized; this is just a best-effort check to aid
+ * in debugging. It is not intended to reliably filter bad interrupts from a
+ * malfunctioning device.
+ */
+irqreturn_t pka_intr_threaded(int irqnum, void *_data)
+{
+ struct pka_data *data = _data;
+
+ if (unlikely(!data->running)) {
+ pr_warn("%s: spurious interrupt!\n", THIS_MODULE->name);
+ } else {
+ data->running = false;
+ complete(&data->done);
+ }
+ return IRQ_HANDLED;
+}
+
+/**
+ * pka_op_wait - wait for operation to finish
+ * @data: the pka to wait for
+ *
+ * Description:
+ * Block waiting for the sequencer interrupt which indicates that the
+ * operation has finished.
+ *
+ * Return:
+ * * 0 - Success
+ * * -EINVAL - PKA returned unknown error code
+ * * -ETIMEDOUT - PKA died
+ */
+int pka_op_wait(struct pka_data *data)
+{
+ int ret;
+ u32 reg;
+
+ ret = wait_for_completion_timeout(&data->done, HZ * 2);
+ reg = readl_relaxed(data->base + PKA_SEQ_CTRL);
+ if (!ret) {
+ /* if we get here, the pka is in a non-recoverable state and
+ * future operations will have undefined behavior, so taint the
+ * kernel
+ */
+ WARN(true, "%s: pka stalled! function 0x%08x, seq 0x%08x\n",
+ THIS_MODULE->name,
+ readl_relaxed(data->base + PKA_FUNCTION), reg);
+ return -ETIMEDOUT;
+ }
+ reg = (reg & PKA_SEQ_CTRL_STATUS_MASK) >> PKA_SEQ_CTRL_STATUS_OFFSET;
+ if (reg == 1)
+ return 0;
+ dev_warn(data->dev, "pka op err 0x%02x\n", reg);
+ return -EINVAL;
+}
+
+static int pka_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ static void __iomem *pka_base;
+ static void __iomem *pka_mem;
+ struct pka_data *dev_data;
+ int ret;
+
+ if (fips_enabled) {
+ dev_err(dev, "FIPS mode is not supported\n");
+ return -EACCES;
+ }
+
+ dev_data = devm_kzalloc(dev, sizeof(*dev_data), GFP_KERNEL);
+ if (!dev_data)
+ return -ENOMEM;
+ pka_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(pka_base))
+ return PTR_ERR(pka_base);
+ pka_mem = devm_platform_ioremap_resource(pdev, 1);
+ if (IS_ERR(pka_mem))
+ return PTR_ERR(pka_mem);
+ dev_data->dev = dev;
+ dev_data->base = pka_base;
+ dev_data->mem = pka_mem;
+ dev_data->running = false;
+ init_completion(&dev_data->done);
+ mutex_init(&dev_data->mutex);
+ platform_set_drvdata(pdev, dev_data);
+ dev_set_drvdata(dev, dev_data);
+
+ pka_print_hw_rev(dev_data);
+ // disable smart idle mode because it isn't smart
+ pka_set_idle(pka_base, false);
+
+ pm_runtime_enable(dev);
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0)
+ goto pm_disable;
+
+ ret = pka_parse_options(dev_data,
+ readl_relaxed(pka_base + PKA_OPTIONS));
+ if (ret) {
+ dev_err(dev, "failed to parse pka opts: %d\n", ret);
+ goto pm_disable;
+ }
+ if (dev_data->config.prog_ram) {
+ dev_data->fw_tries = 0;
+ ret = pka_request_fw(dev_data);
+ if (ret)
+ goto pm_disable;
+ }
+ ret = setup_debug(dev_data);
+ if (ret)
+ goto pm_disable;
+ ret = platform_get_irq(pdev, 0);
+ if (ret < 0)
+ goto pm_disable;
+ ret = devm_request_threaded_irq(dev, ret, pka_intr, pka_intr_threaded,
+ IRQF_TRIGGER_NONE, pdev->name,
+ dev_data);
+ if (ret)
+ goto pm_disable;
+
+ if (dev_data->config.montgomery) {
+ pka_rsa.data = dev_data;
+ ret = crypto_register_akcipher(&pka_rsa.alg.akcipher);
+ if (ret)
+ dev_err(dev,
+ "failed to register crypto op pka_rsa: %d\n",
+ ret);
+ pka_dh.data = dev_data;
+ ret = crypto_register_kpp(&pka_dh.alg.kpp);
+ if (ret)
+ dev_err(dev,
+ "failed to register crypto op pka_dh: %d\n",
+ ret);
+ }
+
+ return 0;
+pm_disable:
+ pm_runtime_put_sync(dev);
+ pm_runtime_disable(dev);
+ pka_set_idle(pka_base, true);
+ return ret;
+}
+
+static int pka_remove(struct platform_device *pdev)
+{
+ struct pka_data *dev_data = platform_get_drvdata(pdev);
+
+ pka_intr_disable(dev_data);
+
+ if (dev_data->config.montgomery) {
+ crypto_unregister_akcipher(&pka_rsa.alg.akcipher);
+ crypto_unregister_kpp(&pka_dh.alg.kpp);
+ }
+
+ pm_runtime_put_sync(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+ pka_set_idle(dev_data->base, true);
+
+ cleanup_debug(pdev);
+
+ return 0;
+}
+
+static const struct of_device_id of_match[] = {
+ { .compatible = "inside-secure,safexcel-eip29t2" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, of_match);
+
+static struct platform_driver pka_driver = {
+ .probe = pka_probe,
+ .remove = pka_remove,
+ .driver = {
+ .name = "saul-pka-crypto",
+ .probe_type = PROBE_PREFER_ASYNCHRONOUS,
+ .of_match_table = of_match,
+ },
+};
+
+module_platform_driver(pka_driver);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Parks <danielrparks@xxxxxx>");
+MODULE_DESCRIPTION("SA2UL Public Key Accelerator Driver");
diff --git a/drivers/crypto/sa2ul_pka/sa2ul_pka_dh.c b/drivers/crypto/sa2ul_pka/sa2ul_pka_dh.c
new file mode 100644
index 000000000000..9edd0383b9b1
--- /dev/null
+++ b/drivers/crypto/sa2ul_pka/sa2ul_pka_dh.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * K3 SA2UL Public Key Accelerator driver
+ *
+ * Copyright (c) 2022 Texas Instruments Incorporated - https://www.ti.com
+ *
+ * Author: Daniel Parks <danielrparks@xxxxxx>
+ */
+
+#include <linux/module.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/kpp.h>
+#include <crypto/dh.h>
+
+#include "sa2ul_pka.h"
+#include "sa2ul_pka_op.h"
+
+/* these are needed per algorithm type because they tend to use different typing
+ * in their data structure
+ */
+static int pka_dh_copy_key_component(const void **dst, const void *src,
+ uint *newsz, uint sz, bool little)
+{
+ if (!src)
+ return 0;
+ // delete leading zeroes
+ while (!*(u8 *)src && sz > 0) {
+ src++;
+ sz--;
+ }
+
+ *newsz = ALIGN(sz, 4);
+
+ if (*newsz > PKA_MAX_VEC) {
+ pr_err("key is longer than maximum supported by PKA\n");
+ return -EINVAL;
+ }
+
+ kfree(*dst);
+ *dst = kmalloc(*newsz, GFP_KERNEL);
+ if (!*dst)
+ return -ENOMEM;
+
+ if (little) {
+ reverse_memcpy((u8 *)*dst, src, sz);
+ memset((u8 *)*dst + sz, 0, *newsz - sz);
+ } else {
+ memset((u8 *)*dst, 0, *newsz - sz);
+ memcpy((u8 *)*dst + *newsz - sz, src, sz);
+ }
+
+ return 0;
+}
+
+static int pka_dh_copy_params(struct dh *dst, struct dh *src)
+{
+ int ret;
+
+ ret = pka_dh_copy_key_component(&dst->key, src->key, &dst->key_size,
+ src->key_size, true);
+ if (ret)
+ goto die;
+ ret = pka_dh_copy_key_component(&dst->p, src->p, &dst->p_size,
+ src->p_size, true);
+ if (ret)
+ goto die;
+ ret = pka_dh_copy_key_component(&dst->g, src->g, &dst->g_size,
+ src->g_size, false);
+ if (ret)
+ goto die;
+ return 0;
+die:
+ kfree(dst->key);
+ kfree(dst->p);
+ kfree(dst->g);
+ return ret;
+}
+
+static int pka_dh_set_secret(struct crypto_kpp *tfm, const void *buffer,
+ uint len)
+{
+ struct dh *ctx = kpp_tfm_ctx(tfm);
+ struct dh new_params;
+ int ret;
+
+ ret = crypto_dh_decode_key(buffer, len, &new_params);
+ if (ret < 0)
+ return ret;
+ ret = pka_dh_copy_params(ctx, &new_params);
+ if (ret)
+ return ret;
+ return 0;
+}
+
+/* uses the same code-sharing trick as dh_compute_value() from crypto/dh.c */
+static int pka_dh_modexp(struct kpp_request *req)
+{
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ struct pka_data *data = PKA_TFM_DATA(tfm, kpp);
+ struct dh *ctx = kpp_tfm_ctx(tfm);
+ struct scatterlist sg;
+ struct scatterlist *base;
+ uint baselen;
+ bool check_base = false;
+
+ if (req->src) {
+ base = req->src;
+ baselen = req->src_len;
+ } else {
+ sg_init_one(&sg, ctx->g, ctx->g_size);
+ base = &sg;
+ baselen = ctx->g_size;
+ check_base = true;
+ }
+ return pka_modexp(data, base, baselen, ctx->key, ctx->key_size, ctx->p,
+ ctx->p_size, req->dst, &req->dst_len, 1, check_base);
+}
+
+static uint pka_dh_max_size(struct crypto_kpp *tfm)
+{
+ struct dh *ctx = kpp_tfm_ctx(tfm);
+
+ return ctx->p_size;
+}
+
+static void pka_dh_exit(struct crypto_kpp *tfm)
+{
+ struct dh *ctx = kpp_tfm_ctx(tfm);
+
+ kfree(ctx->key);
+ kfree(ctx->p);
+ kfree(ctx->g);
+}
+
+struct pka_alg_template pka_dh = {
+ .alg.kpp = {
+ .set_secret = pka_dh_set_secret,
+ .generate_public_key = pka_dh_modexp,
+ .compute_shared_secret = pka_dh_modexp,
+ .max_size = pka_dh_max_size,
+ .exit = pka_dh_exit,
+ .base = {
+ .cra_name = "dh",
+ .cra_driver_name = "dh-sa2ul-pka",
+ .cra_priority = 400,
+ .cra_module = THIS_MODULE,
+ .cra_ctxsize = sizeof(struct dh)
+ }
+ }
+};
diff --git a/drivers/crypto/sa2ul_pka/sa2ul_pka_op.c b/drivers/crypto/sa2ul_pka/sa2ul_pka_op.c
new file mode 100644
index 000000000000..0b7eb6342b0d
--- /dev/null
+++ b/drivers/crypto/sa2ul_pka/sa2ul_pka_op.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * K3 SA2UL Public Key Accelerator driver
+ *
+ * Copyright (c) 2022 Texas Instruments Incorporated - https://www.ti.com
+ *
+ * Author: Daniel Parks <danielrparks@xxxxxx>
+ */
+
+#include <linux/iopoll.h>
+
+#include "sa2ul_pka.h"
+#include "sa2ul_pka_op.h"
+
+/* opcodes */
+#define PKA_SEQ_OP_START_BIT PKA_FUNCTION_RUN
+#define PKA_SEQ_OP(seqopcode) \
+ (PKA_SEQ_OP_START_BIT | (((seqopcode) & 0x38) << 16) \
+ | (((seqopcode) & 0x7) << 12))
+#define PKA_SEQ_OP_MODEXP PKA_SEQ_OP(0x06)
+#define PKA_OP_COMPARE (BIT(10) | PKA_FUNCTION_RUN)
+
+#define PKA_CMP_A_LT_B BIT(1)
+
+/*
+ * The following macros (PKA_ARG_*, PKA_*_OP) can be used to describe a dynamic
+ * memory layout for a PKA operation. For example, the modexp operation
+ * requires the following layout:
+ * +--------+--------------------------+
+ * | Vector | Contents |
+ * +--------+--------------------------+
+ * | A | *Exponent |
+ * | B | *Modulus <pad> |
+ * | C | *Base <pad> |
+ * | D | *Result <pad> *workspace |
+ * +--------+--------------------------+
+ * *64-bit aligned
+ *
+ * This layout can be created using the macros like this:
+ * PKA_SETUP_OP(data->base, data->mem, elen, mlen,
+ * PKA_ARG_START(PKA_APTR); PKA_ARG_IN_WORDS(exponent, elen);
+ * PKA_ARG_START(PKA_BPTR); PKA_ARG_IN_WORDS(modulus, mlen);
+ * PKA_ARG_PAD_WORDS(1);
+ * PKA_ARG_START(PKA_CPTR); PKA_ARG_IN_SCATTER_BE(base, mlen);
+ * PKA_ARG_PAD_WORDS(1);
+ * PKA_ARG_START(PKA_DPTR); PKA_ARG_GETLOC(outptr);
+ * PKA_ARG_PAD_WORDS(mlen);
+ * PKA_ARG_PAD_WORDS(1);
+ * PKA_ARG_WKSPC(...);
+ * );
+ *
+ * The PKA_ARG_* macros must only be used as arguments to the
+ * PKA_SETUP_OP macro.
+ */
+#define PKA_ARG_LOC(loc) (_i = (loc))
+#define PKA_ARG_GETLOC(out) ((out) = _i)
+#define PKA_ARG_ALIGN64() (_i = round_up((_i), 8))
+#define PKA_ARG_IN_WORDS(buf, len) do {\
+ PKA_ARG_ALIGN64(); \
+ for (_j = 0; _j < (len); _j++) { \
+ writel_relaxed(le32_to_cpu((buf)[_j]), _mem + _i); \
+ _i += 4; \
+ } \
+} while (0)
+#define _PKA_ARG_IN_SCATTER(sgl, len, be) do { \
+ PKA_ARG_ALIGN64(); \
+ _j = pka_sg_copy_in(_mem + _i, (sgl), (len), be); \
+ _i += _j; \
+} while (0)
+#define PKA_ARG_IN_SCATTER(sgl, len) _PKA_ARG_IN_SCATTER(sgl, len, false)
+#define PKA_ARG_IN_SCATTER_BE(sgl, len) _PKA_ARG_IN_SCATTER(sgl, len, true)
+#define PKA_ARG_PAD_WORDS(len) do { \
+ for (_j = 0; _j < (len); _j++) { \
+ writel_relaxed(0, _mem + _i); \
+ _i += 4; \
+ } \
+} while (0)
+#define PKA_ARG_WKSPC(len) do { \
+ PKA_ARG_ALIGN64(); \
+ _i += 4 * (len); \
+} while (0)
+#define PKA_ARG_START(reg) do { \
+ PKA_ARG_ALIGN64(); \
+ _reg = (reg); \
+ writel_relaxed(_i / 4, _base + _reg); \
+} while (0)
+
+#define PKA_SETUP_OP(b, m, alen, blen, vecs) do {\
+ u32 _reg, _j, _i = 0; \
+ void __iomem *_base = b; \
+ void __iomem *_mem = m; \
+ writel_relaxed(alen, _base + PKA_ALENGTH); \
+ writel_relaxed(blen, _base + PKA_BLENGTH); \
+ vecs \
+} while (0)
+#define PKA_SUBMIT_OP(_pka_data, opcode) do { \
+ struct pka_data *pka_data = _pka_data; \
+ (pka_data)->running = true; \
+ pka_take_snapshot(pka_data); \
+ writel_relaxed(opcode, (pka_data)->base + PKA_FUNCTION); \
+} while (0)
+
+void *reverse_memcpy(void *dest, const void *src, size_t count)
+{
+ char *tmp = dest + count - 1;
+ const char *s = src;
+
+ while (count--)
+ *tmp-- = *s++;
+ return dest;
+}
+
+/**
+ * pka_modexp - perform a modular exponentiation
+ * @src: base, as a scatterlist
+ * @slen: length in bytes of base
+ * @exp: exponent
+ * @elen: length in bytes of @exponent
+ * @mod: modulus
+ * @mlen: length in bytes of @modulus
+ * @dst: result pointer, as a scatterlist
+ * @dlen: length available in @dst
+ * @shift: number of odd primes to precompute (always 1 for public RSA, max 5
+ * for 4096-bit private RSA)
+ * @check_base: true if (@base < @modulus) should be checked first
+ *
+ * Description:
+ * Perform a modular exponentiation using the PKA.
+ * Blocks waiting for operation to finish.
+ * If @check_base is set, it will take advantage of the fact that it can
+ * reuse memory after a comparison operation to perform the check
+ * (@base < @modulus) with almost no overhead (~10us).
+ *
+ * Return:
+ * * 0 - Success
+ * * -EINVAL - Problem with sizes of input buffers
+ * * -EINVAL - PKA returned error
+ * * -ETIMEDOUT - PKA died
+ */
+int pka_modexp(struct pka_data *data, struct scatterlist *src, uint slen,
+ const u8 *exp, size_t elen, const u8 *mod, size_t mlen,
+ struct scatterlist *dst, uint *dlen, u32 shift,
+ bool check_base)
+{
+ u32 alen, blen, m, diff, tmp = 0;
+ void __iomem *base = data->base;
+ void __iomem *mem = data->mem;
+ int ret;
+
+ if (slen > mlen) {
+ return -EINVAL;
+ } else if (*dlen < mlen) {
+ *dlen = mlen;
+ return -EINVAL;
+ }
+ alen = elen / 4;
+ blen = mlen / 4;
+ mutex_lock(&data->mutex);
+
+ PKA_SETUP_OP(base, mem, blen, 0,
+ PKA_ARG_ALIGN64(); PKA_ARG_IN_WORDS((u32 *)exp, alen);
+ PKA_ARG_START(PKA_BPTR); PKA_ARG_IN_WORDS((u32 *)mod, blen);
+ PKA_ARG_PAD_WORDS(1);
+ PKA_ARG_START(PKA_DPTR);
+ PKA_ARG_START(PKA_APTR); PKA_ARG_GETLOC(m);
+ PKA_ARG_IN_SCATTER_BE(src, slen);
+ PKA_ARG_PAD_WORDS((mlen - slen) / 4);
+ PKA_ARG_PAD_WORDS(1);
+ );
+ if (check_base) {
+ pka_intr_disable(data);
+ PKA_SUBMIT_OP(data, PKA_OP_COMPARE);
+ ret = readl_relaxed_poll_timeout(base + PKA_FUNCTION, tmp,
+ (tmp & PKA_FUNCTION_RUN) == 0,
+ 0, 100);
+ pka_intr_enable(data);
+ if (ret & PKA_FUNCTION_RUN) {
+ ret = -ETIMEDOUT;
+ goto err;
+ }
+ tmp = readl_relaxed(base + PKA_COMPARE);
+ if (!(tmp & PKA_CMP_A_LT_B)) {
+ ret = -EINVAL;
+ goto err;
+ }
+ }
+ writel_relaxed(0, base + PKA_APTR);
+ writel_relaxed(m / 4, base + PKA_CPTR);
+ writel_relaxed(shift, base + PKA_SHIFT);
+ writel_relaxed(alen, base + PKA_ALENGTH);
+ writel_relaxed(blen, base + PKA_BLENGTH);
+
+ PKA_SUBMIT_OP(data, PKA_SEQ_OP_MODEXP);
+ diff = *dlen - blen * 4;
+ ret = pka_op_wait(data);
+ if (ret)
+ goto err;
+ ret = pka_sg_copy_out(mem + m, dst, diff, *dlen, true);
+
+ mutex_unlock(&data->mutex);
+ return ret > 0 ? 0 : ret;
+err:
+ mutex_unlock(&data->mutex);
+ return ret;
+}
diff --git a/drivers/crypto/sa2ul_pka/sa2ul_pka_op.h b/drivers/crypto/sa2ul_pka/sa2ul_pka_op.h
new file mode 100644
index 000000000000..6f7c6f07e8bb
--- /dev/null
+++ b/drivers/crypto/sa2ul_pka/sa2ul_pka_op.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * K3 SA2UL Public Key Accelerator driver
+ *
+ * Copyright (c) 2022 Texas Instruments Incorporated - https://www.ti.com
+ *
+ * Author: Daniel Parks <danielrparks@xxxxxx>
+ */
+
+#ifndef PKA_OP_H
+#define PKA_OP_H
+
+#include <linux/scatterlist.h>
+
+#include "sa2ul_pka.h"
+
+int pka_modexp(struct pka_data *data, struct scatterlist *src, uint slen,
+ const u8 *exponent, size_t elen, const u8 *modulus, size_t mlen,
+ struct scatterlist *dst, uint *dlen, u32 shift,
+ bool check_base);
+
+size_t pka_sg_copy_in(void __iomem *dst, struct scatterlist *src, size_t len,
+ bool big);
+size_t pka_sg_copy_out(void __iomem *src, struct scatterlist *dst, size_t skip,
+ size_t len, bool big);
+void *reverse_memcpy(void *dest, const void *src, size_t count);
+
+#endif /* PKA_OP_H */
diff --git a/drivers/crypto/sa2ul_pka/sa2ul_pka_rsa.c b/drivers/crypto/sa2ul_pka/sa2ul_pka_rsa.c
new file mode 100644
index 000000000000..48d88372236d
--- /dev/null
+++ b/drivers/crypto/sa2ul_pka/sa2ul_pka_rsa.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * K3 SA2UL Public Key Accelerator driver
+ *
+ * Copyright (c) 2022 Texas Instruments Incorporated - https://www.ti.com
+ *
+ * Author: Daniel Parks <danielrparks@xxxxxx>
+ */
+
+#include <linux/module.h>
+#include <crypto/internal/rsa.h>
+#include <crypto/akcipher.h>
+#include <crypto/internal/akcipher.h>
+
+#include "sa2ul_pka.h"
+#include "sa2ul_pka_op.h"
+
+static int pka_rsa_copy_key_component(const u8 **dst, const u8 *src,
+ size_t *newsz, size_t sz)
+{
+ if (!src)
+ return 0;
+ // delete leading zeroes
+ while (!*src && sz > 0) {
+ src++;
+ sz--;
+ }
+
+ *newsz = ALIGN(sz, 4);
+
+ if (*newsz > PKA_MAX_VEC) {
+ pr_err("key is longer than maximum supported by PKA\n");
+ return -EINVAL;
+ }
+
+ kfree(*dst);
+ *dst = kmalloc(*newsz, GFP_KERNEL);
+ if (!*dst)
+ return -ENOMEM;
+
+ reverse_memcpy((u8 *)*dst, src, sz);
+ memset((u8 *)*dst + sz, 0, *newsz - sz);
+
+ return 0;
+}
+
+static int pka_rsa_copy_key(struct rsa_key *dst, struct rsa_key *src)
+{
+ int ret;
+
+ ret = pka_rsa_copy_key_component(&dst->n, src->n, &dst->n_sz,
+ src->n_sz);
+ if (ret)
+ goto die;
+ ret = pka_rsa_copy_key_component(&dst->e, src->e, &dst->e_sz,
+ src->e_sz);
+ if (ret)
+ goto die;
+ ret = pka_rsa_copy_key_component(&dst->d, src->d, &dst->d_sz,
+ src->d_sz);
+ if (ret)
+ goto die;
+ ret = pka_rsa_copy_key_component(&dst->p, src->p, &dst->p_sz,
+ src->p_sz);
+ if (ret)
+ goto die;
+ ret = pka_rsa_copy_key_component(&dst->q, src->q, &dst->q_sz,
+ src->q_sz);
+ if (ret)
+ goto die;
+ ret = pka_rsa_copy_key_component(&dst->dp, src->dp, &dst->dp_sz,
+ src->dp_sz);
+ if (ret)
+ goto die;
+ ret = pka_rsa_copy_key_component(&dst->dq, src->dq, &dst->dq_sz,
+ src->dq_sz);
+ if (ret)
+ goto die;
+ ret = pka_rsa_copy_key_component(&dst->qinv, src->qinv, &dst->qinv_sz,
+ src->qinv_sz);
+ if (ret)
+ goto die;
+ return 0;
+die:
+ kfree(dst->n);
+ kfree(dst->e);
+ kfree(dst->d);
+ kfree(dst->p);
+ kfree(dst->q);
+ kfree(dst->dp);
+ kfree(dst->dq);
+ kfree(dst->qinv);
+ return ret;
+}
+
+static int pka_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
+ unsigned int keylen)
+{
+ struct rsa_key *dst = akcipher_tfm_ctx(tfm);
+ struct rsa_key src = {0};
+ int ret;
+
+ ret = rsa_parse_priv_key(&src, key, keylen);
+ if (ret)
+ return ret;
+ ret = pka_rsa_copy_key(dst, &src);
+ if (ret)
+ return ret;
+ return 0;
+}
+
+static int pka_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key,
+ unsigned int keylen)
+{
+ struct rsa_key *dst = akcipher_tfm_ctx(tfm);
+ struct rsa_key src = {0};
+ int ret;
+
+ ret = rsa_parse_pub_key(&src, key, keylen);
+ if (ret)
+ return ret;
+ ret = pka_rsa_copy_key(dst, &src);
+ return ret;
+}
+
+static int pka_rsa_encrypt(struct akcipher_request *req)
+{
+ struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+ struct pka_data *data = PKA_TFM_DATA(tfm, akcipher);
+ struct rsa_key *key = akcipher_tfm_ctx(tfm);
+
+ // c ≡ m^e (mod n)
+ return pka_modexp(data, req->src, req->src_len, key->e, key->e_sz,
+ key->n, key->n_sz, req->dst, &req->dst_len, 1, true);
+}
+
+static int pka_rsa_decrypt_standard(struct akcipher_request *req)
+{
+ struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+ struct pka_data *data = PKA_TFM_DATA(tfm, akcipher);
+ struct rsa_key *key = akcipher_tfm_ctx(tfm);
+
+ // m ≡ c^d (mod n)
+ return pka_modexp(data, req->src, req->src_len, key->d, key->d_sz,
+ key->n, key->n_sz, req->dst, &req->dst_len, 4, true);
+}
+
+/* in the future, we might use modexp_crt instead of modexp here */
+static int pka_rsa_decrypt(struct akcipher_request *req)
+{
+ return pka_rsa_decrypt_standard(req);
+}
+
+static unsigned int pka_rsa_max_size(struct crypto_akcipher *tfm)
+{
+ struct rsa_key *key = akcipher_tfm_ctx(tfm);
+
+ return key->n_sz;
+}
+
+static void pka_rsa_exit_tfm(struct crypto_akcipher *tfm)
+{
+ struct rsa_key *key = akcipher_tfm_ctx(tfm);
+
+ kfree(key->n);
+ kfree(key->e);
+ kfree(key->d);
+ kfree(key->p);
+ kfree(key->q);
+ kfree(key->dp);
+ kfree(key->dq);
+ kfree(key->qinv);
+}
+
+struct pka_alg_template pka_rsa = {
+ .alg.akcipher = {
+ .encrypt = pka_rsa_encrypt,
+ .decrypt = pka_rsa_decrypt,
+ .sign = NULL,
+ .verify = NULL,
+ .set_priv_key = pka_rsa_set_priv_key,
+ .set_pub_key = pka_rsa_set_pub_key,
+ .max_size = pka_rsa_max_size,
+ .exit = pka_rsa_exit_tfm,
+ .base = {
+ .cra_name = "rsa",
+ .cra_driver_name = "rsa-sa2ul-pka",
+ .cra_priority = 400,
+ .cra_module = THIS_MODULE,
+ .cra_ctxsize = sizeof(struct rsa_key),
+ },
+ }
+};
diff --git a/drivers/crypto/sa2ul_pka/sa2ul_pka_sg.c b/drivers/crypto/sa2ul_pka/sa2ul_pka_sg.c
new file mode 100644
index 000000000000..0b8996a1ca8d
--- /dev/null
+++ b/drivers/crypto/sa2ul_pka/sa2ul_pka_sg.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * K3 SA2UL Public Key Accelerator driver
+ *
+ * Copyright (c) 2022 Texas Instruments Incorporated - https://www.ti.com
+ *
+ * Author: Daniel Parks <danielrparks@xxxxxx>
+ */
+
+#include <asm/unaligned.h>
+
+#include "sa2ul_pka_op.h"
+
+/* align @a and advance @b by the same amount as @a
+ * mod must be a power of two
+ */
+#define ALIGN_ADVANCE(a, b, mod) do { \
+ b += ALIGN(a, mod) - a; \
+ a = ALIGN(a, mod); \
+} while (0)
+/* down-align @a and advance @b by the amount @a would be aligned *upwards*
+ * mod must be a power of two
+ */
+#define ALIGN_DOWN_ADVANCE(a, b, mod) do { \
+ b += ALIGN(a, mod) - a; \
+ a = ALIGN_DOWN(a, mod); \
+} while (0)
+/* reverse a word index @idx with respect to length @len */
+#define REV_IDX(idx, len) ((len) - (idx) - 4)
+
+struct temp_buffer {
+ u32 buf;
+ bool dirty;
+};
+
+/* if we are in the middle of a word in the destination, copy the remainder of
+ * the word from the source and write it out
+ */
+static inline void in_fragment_begin(size_t *dsti, size_t *miteri,
+ struct sg_mapping_iter *miter,
+ void __iomem *dst, size_t len,
+ struct temp_buffer *buf, bool big)
+{
+ if (*dsti % 4 && *miteri + 4 - *dsti % 4 <= miter->length &&
+ *dsti < len) {
+ memcpy(((u8 *)&buf->buf) + *dsti % 4, miter->addr + *miteri,
+ 4 - *dsti % 4);
+ ALIGN_DOWN_ADVANCE(*dsti, *miteri, 4);
+ if (big)
+ writel_relaxed(be32_to_cpu(buf->buf),
+ dst + REV_IDX(*dsti, len));
+ else
+ writel_relaxed(le32_to_cpu(buf->buf), dst + *dsti);
+ buf->dirty = false;
+ buf->buf = 0;
+ *dsti += 4;
+ }
+}
+
+/* if the source sg entry ends in the middle of a word, save the partial word */
+static inline void in_fragment_end(size_t *dsti, size_t *miteri,
+ struct sg_mapping_iter *miter, size_t len,
+ struct temp_buffer *buf)
+{
+ u32 tmp;
+
+ if (*miteri < miter->length && *dsti < len) {
+ tmp = miter->length - *miteri;
+ memcpy((u8 *)&buf->buf + *dsti % 4, miter->addr + *miteri, tmp);
+ buf->dirty = true;
+ *dsti += tmp;
+ }
+}
+
+/* if we have a partial word at the end of the source, zero pad and write it out
+ */
+static inline void in_fragment_last(size_t *dsti, void __iomem *dst, size_t len,
+ struct temp_buffer *buf, bool big)
+{
+ if (buf->dirty) {
+ *dsti = ALIGN_DOWN(*dsti, 4);
+ if (big)
+ writel_relaxed(be32_to_cpu(buf->buf),
+ dst + REV_IDX(*dsti, len));
+ else
+ writel_relaxed(le32_to_cpu(buf->buf), dst + *dsti);
+ buf->dirty = false;
+ buf->buf = 0;
+ }
+}
+
+/* copy words into the destination */
+static inline void in_loop(size_t *dsti, size_t *miteri,
+ struct sg_mapping_iter *miter, void __iomem *dst,
+ size_t len, bool big)
+{
+ u32 tmp;
+
+ while (*miteri + 4 <= miter->length && *dsti + 4 <= len) {
+ if (big) {
+ tmp = be32_to_cpu(*((u32 *)(miter->addr + *miteri)));
+ writel_relaxed(tmp, dst + REV_IDX(*dsti, len));
+ } else {
+ tmp = le32_to_cpu(*((u32 *)(miter->addr + *miteri)));
+ writel_relaxed(tmp, dst + *dsti);
+ }
+ *dsti += 4;
+ *miteri += 4;
+ }
+}
+
+/* copy words into the destination from an unaligned source */
+static inline void in_loop_ua(size_t *dsti, size_t *miteri,
+ struct sg_mapping_iter *miter, void __iomem *dst,
+ size_t len, bool big)
+{
+ u32 tmp;
+
+ while (*miteri + 4 <= miter->length && *dsti + 4 <= len) {
+ if (big) {
+ tmp = be32_to_cpu(get_unaligned(
+ (u32 *)(miter->addr + *miteri)));
+ writel_relaxed(tmp, dst + REV_IDX(*dsti, len));
+ } else {
+ tmp = le32_to_cpu(get_unaligned(
+ (u32 *)(miter->addr + *miteri)));
+ writel_relaxed(tmp, dst + *dsti);
+ }
+ *dsti += 4;
+ *miteri += 4;
+ }
+}
+
+/**
+ * pka_sg_copy_in - copy data from a scatterlist into PKA memory
+ * @dst: destination (in PKA device memory)
+ * @src: source scatterlist
+ * @len: length of the data to copy
+ * @big: true if source data is big-endian
+ *
+ * Description:
+ * Intended for copying big integers used in crypto algorithms. In this case,
+ * the endianness of the memory will be defined by the storage format (e.g.
+ * ASN.1) rather than the host system. This function performs the copy without
+ * using costly bounce buffers.
+ * The PKA memory is always little-endian; if necessary, the endianness will
+ * be swapped on-the-fly during the copy to the PKA.
+ * If the scatterlist ends before @len bytes have been copied, this function
+ * will stop early.
+ *
+ * Return:
+ * The number of bytes copied.
+ */
+size_t pka_sg_copy_in(void __iomem *dst, struct scatterlist *src, size_t len,
+ bool big)
+{
+ size_t dsti = 0, miteri;
+ struct sg_mapping_iter miter;
+ struct temp_buffer buf = {0, false};
+
+ if (big && len % 4)
+ ALIGN_ADVANCE(len, dsti, 4);
+
+ sg_miter_start(&miter, src, sg_nents(src), SG_MITER_FROM_SG);
+ while (sg_miter_next(&miter) && dsti < len) {
+ miteri = 0;
+ in_fragment_begin(&dsti, &miteri, &miter, dst, len, &buf, big);
+ if (IS_ALIGNED((unsigned long)miter.addr + miteri, 4))
+ in_loop(&dsti, &miteri, &miter, dst, len, big);
+ else
+ in_loop_ua(&dsti, &miteri, &miter, dst, len, big);
+ in_fragment_end(&dsti, &miteri, &miter, len, &buf);
+ }
+ in_fragment_last(&dsti, dst, len, &buf, big);
+ sg_miter_stop(&miter);
+ return dsti;
+}
+
+/* if we are in the middle of a word in the source, write the remainder of the
+ * word to the destination
+ */
+static inline void out_fragment_begin(size_t *srci, size_t *miteri,
+ struct sg_mapping_iter *miter,
+ void __iomem *src, size_t len, u32 *buf,
+ bool big)
+{
+ void __iomem *addr;
+
+ if (*srci % 4 && *miteri + 4 - *srci % 4 <= miter->length &&
+ *srci < len) {
+ if (big) {
+ addr = src + REV_IDX(ALIGN_DOWN(*srci, 4), len);
+ *buf = cpu_to_be32(readl_relaxed(addr));
+ }
+ memcpy(miter->addr + *miteri, ((u8 *)buf) + *srci % 4,
+ 4 - *srci % 4);
+ ALIGN_ADVANCE(*srci, *miteri, 4);
+ }
+}
+
+/* if the destination sg entry ends in the middle of a word, write what we can
+ * and save the rest
+ */
+static inline void out_fragment_end(size_t *srci, size_t *miteri,
+ struct sg_mapping_iter *miter,
+ void __iomem *src, size_t len, u32 *buf,
+ bool big)
+{
+ u32 tmp;
+ void __iomem *addr;
+
+ if (*miteri < miter->length && *srci < len) {
+ tmp = min(miter->length - *miteri, len - *srci);
+ if (big) {
+ addr = src + REV_IDX(*srci, len);
+ *buf = cpu_to_be32(readl_relaxed(addr));
+ } else {
+ *buf = cpu_to_le32(readl_relaxed(src + *srci));
+ }
+ memcpy(miter->addr + *miteri, (u8 *)buf + *srci % 4, tmp);
+ *srci += tmp;
+ }
+}
+
+/* copy words from the source to the destination */
+static inline void out_loop(size_t *srci, size_t *miteri,
+ struct sg_mapping_iter *miter, void __iomem *src,
+ size_t len, bool big)
+{
+ u32 tmp;
+ void __iomem *addr;
+
+ while (*miteri + 4 <= miter->length && *srci + 4 <= len) {
+ if (big) {
+ addr = src + REV_IDX(*srci, len);
+ tmp = cpu_to_be32(readl_relaxed(addr));
+ *((u32 *)(miter->addr + *miteri)) = tmp;
+ } else {
+ tmp = cpu_to_le32(readl_relaxed(src + *srci));
+ *((u32 *)(miter->addr + *miteri)) = tmp;
+ }
+ *srci += 4;
+ *miteri += 4;
+ }
+}
+
+/* copy words from the source to an unaligned destination */
+static inline void out_loop_ua(size_t *srci, size_t *miteri,
+ struct sg_mapping_iter *miter, void __iomem *src,
+ size_t len, bool big)
+{
+ u32 tmp;
+ void __iomem *addr;
+
+ while (*miteri + 4 <= miter->length && *srci + 4 <= len) {
+ if (big) {
+ addr = src + REV_IDX(*srci, len);
+ tmp = cpu_to_be32(readl_relaxed(addr));
+ put_unaligned(tmp, (u32 *)(miter->addr + *miteri));
+ } else {
+ tmp = cpu_to_le32(readl_relaxed(src + *srci));
+ put_unaligned(tmp, (u32 *)(miter->addr + *miteri));
+ }
+ *srci += 4;
+ *miteri += 4;
+ }
+}
+
+/**
+ * pka_sg_copy_out - copy data from PKA memory into a scatterlist
+ * @src: source (in PKA device memory)
+ * @dst: destination scatterlist
+ * @skip: number of bytes to skip in the destination before copying
+ * @len: length of the data to copy
+ * @big: true if destination data is big endian
+ *
+ * Description:
+ * Intended for copying big integers used in crypto algorithms. In this case,
+ * the endianness of the memory will be defined by the storage format (e.g.
+ * ASN.1) rather than the host system. This function performs the copy without
+ * using costly bounce buffers.
+ * The PKA memory is always little-endian; if necessary, the endianness will
+ * be swapped on-the-fly during the copy from the PKA.
+ * If the scatterlist ends before @len bytes have been copied, this function
+ * will stop early.
+ *
+ * Return:
+ * The number of bytes copied.
+ */
+size_t pka_sg_copy_out(void __iomem *src, struct scatterlist *dst, size_t skip,
+ size_t len, bool big)
+{
+ size_t srci = 0, miteri;
+ struct sg_mapping_iter miter;
+ u32 buf = 0;
+
+ len -= skip;
+ sg_miter_start(&miter, dst, sg_nents(dst), SG_MITER_TO_SG);
+ sg_zero_buffer(dst, sg_nents(dst), skip, 0);
+ if (!sg_miter_skip(&miter, skip))
+ return 0;
+ if (big && len % 4)
+ ALIGN_ADVANCE(len, srci, 4);
+
+ while (sg_miter_next(&miter) && srci < len) {
+ miteri = 0;
+ out_fragment_begin(&srci, &miteri, &miter, src, len, &buf, big);
+ if (IS_ALIGNED((unsigned long)miter.addr + miteri, 4))
+ out_loop(&srci, &miteri, &miter, src, len, big);
+ else
+ out_loop_ua(&srci, &miteri, &miter, src, len, big);
+ out_fragment_end(&srci, &miteri, &miter, src, len, &buf, big);
+ }
+ sg_miter_stop(&miter);
+ return srci;
+}
--
2.17.1