Re: [PATCH v7 12/14] crypto: iaa - Add support for deflate-iaa compression algorithm

From: Rex Zhang
Date: Sun Jul 16 2023 - 22:12:53 EST


Hi, Tom,

On 2023-07-10 at 14:06:52 -0500, Tom Zanussi wrote:
> This patch registers the deflate-iaa deflate compression algorithm and
> hooks it up to the IAA hardware using the 'fixed' compression mode
> introduced in the previous patch.
>
> Because the IAA hardware has a 4k history-window limitation, only
> buffers <= 4k, or that have been compressed using a <= 4k history
> window, are technically compliant with the deflate spec, which allows
> for a window of up to 32k. Because of this limitation, the IAA fixed
> mode deflate algorithm is given its own algorithm name, 'deflate-iaa'.
>
> With this change, the deflate-iaa crypto algorithm is registered and
> operational, and compression and decompression operations are fully
> enabled following the successful binding of the first IAA workqueue
> to the iaa_crypto sub-driver.
>
> when there are no IAA workqueues bound to the driver, the IAA crypto
> algorithm can be unregistered by removing the module.
>
> A new iaa_crypto 'verify_compress' driver attribute is also added,
> allowing the user to toggle compression verification. If set, each
> compress will be internally decompressed and the contents verified,
> returning error codes if unsuccessful. This can be toggled with 0/1:
>
> echo 0 > /sys/bus/dsa/drivers/crypto/verify_compress
>
> The default setting is '1' - verify all compresses.
>
> The verify_compress value setting at the time the algorithm is
> registered is captured in the algorithm's crypto_ctx and used for all
> compresses when using the algorithm.
>
> [ Based on work originally by George Powley, Jing Lin and Kyung Min
> Park ]
>
> Signed-off-by: Tom Zanussi <tom.zanussi@xxxxxxxxxxxxxxx>
> ---
> crypto/testmgr.c | 10 +
> drivers/crypto/intel/iaa/iaa_crypto.h | 36 +
> drivers/crypto/intel/iaa/iaa_crypto_main.c | 921 ++++++++++++++++++++-
> 3 files changed, 950 insertions(+), 17 deletions(-)
>
> diff --git a/crypto/testmgr.c b/crypto/testmgr.c
> index 216878c8bc3d..b6d924e0ff59 100644
> --- a/crypto/testmgr.c
> +++ b/crypto/testmgr.c
> @@ -4819,6 +4819,16 @@ static const struct alg_test_desc alg_test_descs[] = {
> .decomp = __VECS(deflate_decomp_tv_template)
> }
> }
> + }, {
> + .alg = "deflate-iaa",
> + .test = alg_test_comp,
> + .fips_allowed = 1,
> + .suite = {
> + .comp = {
> + .comp = __VECS(deflate_comp_tv_template),
> + .decomp = __VECS(deflate_decomp_tv_template)
> + }
> + }
> }, {
> .alg = "dh",
> .test = alg_test_kpp,
> diff --git a/drivers/crypto/intel/iaa/iaa_crypto.h b/drivers/crypto/intel/iaa/iaa_crypto.h
> index 33e68f9d3d02..4c6b0f5a6b50 100644
> --- a/drivers/crypto/intel/iaa/iaa_crypto.h
> +++ b/drivers/crypto/intel/iaa/iaa_crypto.h
> @@ -10,15 +10,42 @@
>
> #define IDXD_SUBDRIVER_NAME "crypto"
>
> +#define IAA_DECOMP_ENABLE BIT(0)
> +#define IAA_DECOMP_FLUSH_OUTPUT BIT(1)
> +#define IAA_DECOMP_CHECK_FOR_EOB BIT(2)
> +#define IAA_DECOMP_STOP_ON_EOB BIT(3)
> +#define IAA_DECOMP_SUPPRESS_OUTPUT BIT(9)
> +
> +#define IAA_COMP_FLUSH_OUTPUT BIT(1)
> +#define IAA_COMP_APPEND_EOB BIT(2)
> +
> +#define IAA_COMPLETION_TIMEOUT 1000000
> +
> +#define IAA_ANALYTICS_ERROR 0x0a
> +#define IAA_ERROR_DECOMP_BUF_OVERFLOW 0x0b
> +#define IAA_ERROR_COMP_BUF_OVERFLOW 0x19
> +#define IAA_ERROR_WATCHDOG_EXPIRED 0x24
> +
> #define IAA_COMP_MODES_MAX 2
>
> #define FIXED_HDR 0x2
> #define FIXED_HDR_SIZE 3
>
> +#define IAA_COMP_FLAGS (IAA_COMP_FLUSH_OUTPUT | \
> + IAA_COMP_APPEND_EOB)
> +
> +#define IAA_DECOMP_FLAGS (IAA_DECOMP_ENABLE | \
> + IAA_DECOMP_FLUSH_OUTPUT | \
> + IAA_DECOMP_CHECK_FOR_EOB | \
> + IAA_DECOMP_STOP_ON_EOB)
> +
> /* Representation of IAA workqueue */
> struct iaa_wq {
> struct list_head list;
> +
> struct idxd_wq *wq;
> + int ref;
> + bool remove;
>
> struct iaa_device *iaa_device;
> };
> @@ -119,4 +146,13 @@ int add_iaa_compression_mode(const char *name,
>
> void remove_iaa_compression_mode(const char *name);
>
> +enum iaa_mode {
> + IAA_MODE_FIXED,
> +};
> +
> +struct iaa_compression_ctx {
> + enum iaa_mode mode;
> + bool verify_compress;
> +};
> +
> #endif
> diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
> index 0c59332456f0..9b4acc343582 100644
> --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
> +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
> @@ -10,6 +10,7 @@
> #include <uapi/linux/idxd.h>
> #include <linux/highmem.h>
> #include <linux/sched/smt.h>
> +#include <crypto/internal/acompress.h>
>
> #include "idxd.h"
> #include "iaa_crypto.h"
> @@ -32,6 +33,20 @@ static unsigned int cpus_per_iaa;
> /* Per-cpu lookup table for balanced wqs */
> static struct wq_table_entry __percpu *wq_table;
>
> +static struct idxd_wq *wq_table_next_wq(int cpu)
> +{
> + struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
> +
> + if (++entry->cur_wq >= entry->n_wqs)
> + entry->cur_wq = 0;
> +
> + pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__,
> + entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id,
> + entry->wqs[entry->cur_wq]->id, cpu);
> +
> + return entry->wqs[entry->cur_wq];
> +}
> +
> static void wq_table_add(int cpu, struct idxd_wq *wq)
> {
> struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
> @@ -66,6 +81,40 @@ static void wq_table_clear_entry(int cpu)
> static LIST_HEAD(iaa_devices);
> static DEFINE_MUTEX(iaa_devices_lock);
>
> +/* If enabled, IAA hw crypto algos are registered, unavailable otherwise */
> +static bool iaa_crypto_enabled;
> +static bool iaa_crypto_registered;
> +
> +/* Verify results of IAA compress or not */
> +static bool iaa_verify_compress = true;
> +
> +static ssize_t verify_compress_show(struct device_driver *driver, char *buf)
> +{
> + return sprintf(buf, "%d\n", iaa_verify_compress);
> +}
> +
> +static ssize_t verify_compress_store(struct device_driver *driver,
> + const char *buf, size_t count)
> +{
> + int ret = -EBUSY;
> +
> + mutex_lock(&iaa_devices_lock);
> +
> + if (iaa_crypto_enabled)
> + goto out;
> +
> + ret = kstrtobool(buf, &iaa_verify_compress);
> + if (ret)
> + goto out;
> +
> + ret = count;
> +out:
> + mutex_unlock(&iaa_devices_lock);
> +
> + return ret;
> +}
> +static DRIVER_ATTR_RW(verify_compress);
> +
> static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX];
>
> static int find_empty_iaa_compression_mode(void)
> @@ -250,6 +299,12 @@ int add_iaa_compression_mode(const char *name,
> }
> EXPORT_SYMBOL_GPL(add_iaa_compression_mode);
>
> +static struct iaa_device_compression_mode *
> +get_iaa_device_compression_mode(struct iaa_device *iaa_device, int idx)
> +{
> + return iaa_device->compression_modes[idx];
> +}
> +
> static void free_device_compression_mode(struct iaa_device *iaa_device,
> struct iaa_device_compression_mode *device_mode)
> {
> @@ -268,6 +323,86 @@ static void free_device_compression_mode(struct iaa_device *iaa_device,
> kfree(device_mode);
> }
>
> +#define IDXD_OP_FLAG_AECS_RW_TGLS 0x400000
> +#define IAX_AECS_DEFAULT_FLAG (IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC)
> +#define IAX_AECS_COMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
> +#define IAX_AECS_DECOMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
> +#define IAX_AECS_GEN_FLAG (IAX_AECS_DEFAULT_FLAG | \
> + IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \
> + IDXD_OP_FLAG_AECS_RW_TGLS)
> +
> +static int check_completion(struct device *dev,
> + struct iax_completion_record *comp,
> + bool compress,
> + bool only_once);
> +
> +static int decompress_header(struct iaa_device_compression_mode *device_mode,
> + struct iaa_compression_mode *mode,
> + struct idxd_wq *wq)
> +{
> + dma_addr_t src_addr, src2_addr;
> + struct idxd_desc *idxd_desc;
> + struct iax_hw_desc *desc;
> + struct device *dev;
> + int ret = 0;
> +
> + idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
> + if (IS_ERR(idxd_desc))
> + return PTR_ERR(idxd_desc);
> +
> + desc = idxd_desc->iax_hw;
> +
> + dev = &wq->idxd->pdev->dev;
> +
> + src_addr = dma_map_single(dev, (void *)mode->header_table,
> + mode->header_table_size, DMA_TO_DEVICE);
> + dev_dbg(dev, "%s: mode->name %s, src_addr %llx, dev %p, src %p, slen %d\n",
> + __func__, mode->name, src_addr, dev,
> + mode->header_table, mode->header_table_size);
> + if (unlikely(dma_mapping_error(dev, src_addr))) {
> + dev_dbg(dev, "dma_map_single err, exiting\n");
> + ret = -ENOMEM;
> + return ret;
> + }
> +
> + desc->flags = IAX_AECS_GEN_FLAG;
> + desc->opcode = IAX_OPCODE_DECOMPRESS;
> +
> + desc->src1_addr = (u64)src_addr;
> + desc->src1_size = mode->header_table_size;
> +
> + src2_addr = device_mode->aecs_decomp_table_dma_addr;
> + desc->src2_addr = (u64)src2_addr;
> + desc->src2_size = 1088;
> + dev_dbg(dev, "%s: mode->name %s, src2_addr %llx, dev %p, src2_size %d\n",
> + __func__, mode->name, desc->src2_addr, dev, desc->src2_size);
> + desc->max_dst_size = 0; // suppressed output
> +
> + desc->decompr_flags = mode->gen_decomp_table_flags;
> +
> + desc->priv = 1;
> +
> + desc->completion_addr = idxd_desc->compl_dma;
> +
> + ret = idxd_submit_desc(wq, idxd_desc);
> + if (ret) {
> + pr_err("%s: submit_desc failed ret=0x%x\n", __func__, ret);
> + goto out;
> + }
> +
> + ret = check_completion(dev, idxd_desc->iax_completion, false, false);
> + if (ret)
> + dev_dbg(dev, "%s: mode->name %s check_completion failed ret=%d\n",
> + __func__, mode->name, ret);
> + else
> + dev_dbg(dev, "%s: mode->name %s succeeded\n", __func__,
> + mode->name);
> +out:
> + dma_unmap_single(dev, src_addr, 1088, DMA_TO_DEVICE);
> +
> + return ret;
> +}
> +
> static int init_device_compression_mode(struct iaa_device *iaa_device,
> struct iaa_compression_mode *mode,
> int idx, struct idxd_wq *wq)
> @@ -300,6 +435,14 @@ static int init_device_compression_mode(struct iaa_device *iaa_device,
> memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size);
> memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size);
>
> + if (mode->header_table) {
> + ret = decompress_header(device_mode, mode, wq);
> + if (ret) {
> + pr_debug("iaa header decompression failed: ret=%d\n", ret);
> + goto free;
> + }
> + }
> +
> if (mode->init) {
> ret = mode->init(device_mode);
> if (ret)
> @@ -372,18 +515,6 @@ static struct iaa_device *iaa_device_alloc(void)
> return iaa_device;
> }
>
> -static void iaa_device_free(struct iaa_device *iaa_device)
> -{
> - struct iaa_wq *iaa_wq, *next;
> -
> - list_for_each_entry_safe(iaa_wq, next, &iaa_device->wqs, list) {
> - list_del(&iaa_wq->list);
> - kfree(iaa_wq);
> - }
> -
> - kfree(iaa_device);
> -}
> -
> static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
> {
> struct iaa_wq *iaa_wq;
> @@ -426,12 +557,8 @@ static int init_iaa_device(struct iaa_device *iaa_device, struct iaa_wq *iaa_wq)
>
> static void del_iaa_device(struct iaa_device *iaa_device)
> {
> - remove_device_compression_modes(iaa_device);
> -
> list_del(&iaa_device->list);
>
> - iaa_device_free(iaa_device);
> -
> nr_iaa--;
> }
>
> @@ -497,6 +624,82 @@ static void clear_wq_table(void)
> pr_debug("cleared wq table\n");
> }
>
> +static void free_iaa_device(struct iaa_device *iaa_device)
> +{
> + if (!iaa_device)
> + return;
> +
> + remove_device_compression_modes(iaa_device);
> + kfree(iaa_device);
> +}
> +
> +static void __free_iaa_wq(struct iaa_wq *iaa_wq)
> +{
> + struct iaa_device *iaa_device;
> +
> + if (!iaa_wq)
> + return;
> +
> + iaa_device = iaa_wq->iaa_device;
> + if (iaa_device->n_wq == 0)
> + free_iaa_device(iaa_wq->iaa_device);
> +}
> +
> +static void free_iaa_wq(struct iaa_wq *iaa_wq)
> +{
> + struct idxd_wq *wq;
> +
> + __free_iaa_wq(iaa_wq);
> +
> + wq = iaa_wq->wq;
> +
> + kfree(iaa_wq);
> + idxd_wq_set_private(wq, NULL);
> +}
> +
> +static int iaa_wq_get(struct idxd_wq *wq)
> +{
> + struct idxd_device *idxd = wq->idxd;
> + struct iaa_wq *iaa_wq;
> + int ret = 0;
> +
> + spin_lock(&idxd->dev_lock);
> + iaa_wq = idxd_wq_get_private(wq);
> + if (iaa_wq && !iaa_wq->remove)
> + iaa_wq->ref++;
> + else
> + ret = -ENODEV;
> + spin_unlock(&idxd->dev_lock);
> +
> + return ret;
> +}
> +
> +static int iaa_wq_put(struct idxd_wq *wq)
> +{
> + struct idxd_device *idxd = wq->idxd;
> + struct iaa_wq *iaa_wq;
> + bool free = false;
> + int ret = 0;
> +
> + spin_lock(&idxd->dev_lock);
> + iaa_wq = idxd_wq_get_private(wq);
> + if (iaa_wq) {
> + iaa_wq->ref--;
> + if (iaa_wq->ref == 0 && iaa_wq->remove) {
> + __free_iaa_wq(iaa_wq);
> + idxd_wq_set_private(wq, NULL);
> + free = true;
> + }
> + } else {
> + ret = -ENODEV;
> + }
> + spin_unlock(&idxd->dev_lock);
> + if (free)
> + kfree(iaa_wq);
> +
> + return ret;
> +}
> +
> static void free_wq_table(void)
> {
> int cpu;
> @@ -580,6 +783,7 @@ static int save_iaa_wq(struct idxd_wq *wq)
> ret = add_iaa_wq(new_device, wq, &new_wq);
> if (ret) {
> del_iaa_device(new_device);
> + free_iaa_device(new_device);
> goto out;
> }
>
> @@ -587,6 +791,7 @@ static int save_iaa_wq(struct idxd_wq *wq)
> if (ret) {
> del_iaa_wq(new_device, new_wq->wq);
> del_iaa_device(new_device);
> + free_iaa_wq(new_wq);
> goto out;
> }
> }
> @@ -724,6 +929,624 @@ static void rebalance_wq_table(void)
> }
> }
>
> +static inline int check_completion(struct device *dev,
> + struct iax_completion_record *comp,
> + bool compress,
> + bool only_once)
> +{
> + char *op_str = compress ? "compress" : "decompress";
> + int ret = 0;
> +
> + while (!comp->status) {
> + if (only_once)
> + return -EAGAIN;
> + cpu_relax();
> + }
> +
> + if (comp->status != IAX_COMP_SUCCESS) {
> + if (comp->status == IAA_ERROR_WATCHDOG_EXPIRED) {
> + ret = -ETIMEDOUT;
> + dev_dbg(dev, "%s timed out, size=0x%x\n",
> + op_str, comp->output_size);
> + goto out;
> + }
> +
> + if (comp->status == IAA_ANALYTICS_ERROR &&
> + comp->error_code == IAA_ERROR_COMP_BUF_OVERFLOW && compress) {
> + ret = -E2BIG;
> + dev_dbg(dev, "compressed > uncompressed size,"
> + " not compressing, size=0x%x\n",
> + comp->output_size);
> + goto out;
> + }
> +
> + if (comp->status == IAA_ERROR_DECOMP_BUF_OVERFLOW) {
> + ret = -EOVERFLOW;
> + goto out;
> + }
> +
> + ret = -EINVAL;
> + dev_dbg(dev, "iaa %s status=0x%x, error=0x%x, size=0x%x\n",
> + op_str, comp->status, comp->error_code, comp->output_size);
> + print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 8, 1, comp, 64, 0);
> +
> + goto out;
> + }
> +out:
> + return ret;
> +}
> +
> +static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
> + struct idxd_wq *wq,
> + dma_addr_t src_addr, unsigned int slen,
> + dma_addr_t dst_addr, unsigned int *dlen,
> + u32 *compression_crc,
> + bool disable_async)
> +{
> + struct iaa_device_compression_mode *active_compression_mode;
> + struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
> + struct iaa_device *iaa_device;
> + struct idxd_desc *idxd_desc;
> + struct iax_hw_desc *desc;
> + struct idxd_device *idxd;
> + struct iaa_wq *iaa_wq;
> + struct pci_dev *pdev;
> + struct device *dev;
> + int ret = 0;
> +
> + iaa_wq = idxd_wq_get_private(wq);
> + iaa_device = iaa_wq->iaa_device;
> + idxd = iaa_device->idxd;
> + pdev = idxd->pdev;
> + dev = &pdev->dev;
> +
> + active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
> +
> + idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
> + if (IS_ERR(idxd_desc)) {
> + dev_dbg(dev, "idxd descriptor allocation failed\n");
> + dev_dbg(dev, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc));
> + return PTR_ERR(idxd_desc);
> + }
> + desc = idxd_desc->iax_hw;
> +
> + desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR |
> + IDXD_OP_FLAG_RD_SRC2_AECS | IDXD_OP_FLAG_CC;
> + desc->opcode = IAX_OPCODE_COMPRESS;
> + desc->compr_flags = IAA_COMP_FLAGS;
> + desc->priv = 1;
> +
> + desc->src1_addr = (u64)src_addr;
> + desc->src1_size = slen;
> + desc->dst_addr = (u64)dst_addr;
> + desc->max_dst_size = *dlen;
> + desc->src2_addr = active_compression_mode->aecs_comp_table_dma_addr;
> + desc->src2_size = sizeof(struct aecs_comp_table_record);
> + desc->completion_addr = idxd_desc->compl_dma;
> +
> + dev_dbg(dev, "%s: compression mode %s,"
> + " desc->src1_addr %llx, desc->src1_size %d,"
> + " desc->dst_addr %llx, desc->max_dst_size %d,"
> + " desc->src2_addr %llx, desc->src2_size %d\n", __func__,
> + active_compression_mode->name,
> + desc->src1_addr, desc->src1_size, desc->dst_addr,
> + desc->max_dst_size, desc->src2_addr, desc->src2_size);
> +
> + ret = idxd_submit_desc(wq, idxd_desc);
> + if (ret) {
> + dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
> + goto err;
> + }
> +
> + ret = check_completion(dev, idxd_desc->iax_completion, true, false);
> + if (ret) {
> + dev_dbg(dev, "check_completion failed ret=%d\n", ret);
> + goto err;
> + }
> +
> + *dlen = idxd_desc->iax_completion->output_size;
> +
> + *compression_crc = idxd_desc->iax_completion->crc;
> +
> + idxd_free_desc(wq, idxd_desc);
> +out:
> + return ret;
> +err:
> + idxd_free_desc(wq, idxd_desc);
> + dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
> +
> + goto out;
> +}
> +
> +static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
> + struct idxd_wq *wq,
> + dma_addr_t src_addr, unsigned int slen,
> + dma_addr_t dst_addr, unsigned int *dlen,
> + u32 compression_crc)
> +{
> + struct iaa_device_compression_mode *active_compression_mode;
> + struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
> + struct iaa_device *iaa_device;
> + struct idxd_desc *idxd_desc;
> + struct iax_hw_desc *desc;
> + struct idxd_device *idxd;
> + struct iaa_wq *iaa_wq;
> + struct pci_dev *pdev;
> + struct device *dev;
> + int ret = 0;
> +
> + iaa_wq = idxd_wq_get_private(wq);
> + iaa_device = iaa_wq->iaa_device;
> + idxd = iaa_device->idxd;
> + pdev = idxd->pdev;
> + dev = &pdev->dev;
> +
> + active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
> +
> + idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
> + if (IS_ERR(idxd_desc)) {
> + dev_dbg(dev, "idxd descriptor allocation failed\n");
> + dev_dbg(dev, "iaa compress failed: ret=%ld\n",
> + PTR_ERR(idxd_desc));
> + return PTR_ERR(idxd_desc);
> + }
> + desc = idxd_desc->iax_hw;
> +
> + /* Verify (optional) - decompress and check crc, suppress dest write */
> +
> + desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
> + desc->opcode = IAX_OPCODE_DECOMPRESS;
> + desc->decompr_flags = IAA_DECOMP_FLAGS | IAA_DECOMP_SUPPRESS_OUTPUT;
> + desc->priv = 1;
> +
> + desc->src1_addr = (u64)dst_addr;
> + desc->src1_size = *dlen;
> + desc->dst_addr = (u64)src_addr;
> + desc->max_dst_size = slen;
> + desc->completion_addr = idxd_desc->compl_dma;
> +
> + dev_dbg(dev, "(verify) compression mode %s,"
> + " desc->src1_addr %llx, desc->src1_size %d,"
> + " desc->dst_addr %llx, desc->max_dst_size %d,"
> + " desc->src2_addr %llx, desc->src2_size %d\n",
> + active_compression_mode->name,
> + desc->src1_addr, desc->src1_size, desc->dst_addr,
> + desc->max_dst_size, desc->src2_addr, desc->src2_size);
> +
> + ret = idxd_submit_desc(wq, idxd_desc);
> + if (ret) {
> + dev_dbg(dev, "submit_desc (verify) failed ret=%d\n", ret);
> + goto err;
> + }
> +
> + ret = check_completion(dev, idxd_desc->iax_completion, false, false);
> + if (ret) {
> + dev_dbg(dev, "(verify) check_completion failed ret=%d\n", ret);
> + goto err;
> + }
> +
> + if (compression_crc != idxd_desc->iax_completion->crc) {
> + ret = -EINVAL;
> + dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:"
> + " comp=0x%x, decomp=0x%x\n", compression_crc,
> + idxd_desc->iax_completion->crc);
> + print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET,
> + 8, 1, idxd_desc->iax_completion, 64, 0);
> + goto err;
> + }
> +
> + idxd_free_desc(wq, idxd_desc);
> +out:
> + return ret;
> +err:
> + idxd_free_desc(wq, idxd_desc);
> + dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
> +
> + goto out;
> +}
> +
> +static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
> + struct idxd_wq *wq,
> + dma_addr_t src_addr, unsigned int slen,
> + dma_addr_t dst_addr, unsigned int *dlen,
> + bool disable_async)
> +{
> + struct iaa_device_compression_mode *active_compression_mode;
> + struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
> + struct iaa_device *iaa_device;
> + struct idxd_desc *idxd_desc;
> + struct iax_hw_desc *desc;
> + struct idxd_device *idxd;
> + struct iaa_wq *iaa_wq;
> + struct pci_dev *pdev;
> + struct device *dev;
> + int ret = 0;
> +
> + iaa_wq = idxd_wq_get_private(wq);
> + iaa_device = iaa_wq->iaa_device;
> + idxd = iaa_device->idxd;
> + pdev = idxd->pdev;
> + dev = &pdev->dev;
> +
> + active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
> +
> + idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
> + if (IS_ERR(idxd_desc)) {
> + dev_dbg(dev, "idxd descriptor allocation failed\n");
> + dev_dbg(dev, "iaa decompress failed: ret=%ld\n",
> + PTR_ERR(idxd_desc));
> + return PTR_ERR(idxd_desc);
> + }
> + desc = idxd_desc->iax_hw;
> +
> + desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
> + desc->opcode = IAX_OPCODE_DECOMPRESS;
> + desc->max_dst_size = PAGE_SIZE;
> + desc->decompr_flags = IAA_DECOMP_FLAGS;
> + desc->priv = 1;
> +
> + desc->src1_addr = (u64)src_addr;
> + desc->dst_addr = (u64)dst_addr;
> + desc->max_dst_size = *dlen;
> + desc->src1_size = slen;
> + desc->completion_addr = idxd_desc->compl_dma;
> +
> + dev_dbg(dev, "%s: decompression mode %s,"
> + " desc->src1_addr %llx, desc->src1_size %d,"
> + " desc->dst_addr %llx, desc->max_dst_size %d,"
> + " desc->src2_addr %llx, desc->src2_size %d\n", __func__,
> + active_compression_mode->name,
> + desc->src1_addr, desc->src1_size, desc->dst_addr,
> + desc->max_dst_size, desc->src2_addr, desc->src2_size);
> +
> + ret = idxd_submit_desc(wq, idxd_desc);
> + if (ret) {
> + dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
> + goto err;
> + }
> +
> + ret = check_completion(dev, idxd_desc->iax_completion, false, false);
> + if (ret) {
> + dev_dbg(dev, "check_completion failed ret=%d\n", ret);
> + goto err;
> + }
> +
> + *dlen = idxd_desc->iax_completion->output_size;
> +
> + idxd_free_desc(wq, idxd_desc);
> +out:
> + return ret;
> +err:
> + idxd_free_desc(wq, idxd_desc);
> + dev_dbg(dev, "iaa decompress failed: ret=%d\n", ret);
> +
> + goto out;
> +}
> +
> +static int iaa_comp_acompress(struct acomp_req *req)
> +{
> + struct iaa_compression_ctx *compression_ctx;
> + struct crypto_tfm *tfm = req->base.tfm;
> + dma_addr_t src_addr, dst_addr;
> + int nr_sgs, cpu, ret = 0;
> + struct iaa_wq *iaa_wq;
> + u32 compression_crc;
> + struct idxd_wq *wq;
> + struct device *dev;
> +
> + compression_ctx = crypto_tfm_ctx(tfm);
> +
> + if (!iaa_crypto_enabled) {
> + pr_debug("iaa_crypto disabled, not compressing\n");
> + return -ENODEV;
> + }
> +
> + if (!req->src || !req->slen) {
> + pr_debug("invalid src, not compressing\n");
> + return -EINVAL;
> + }
> +
> + cpu = get_cpu();
> + wq = wq_table_next_wq(cpu);
> + put_cpu();
> + if (!wq) {
> + pr_debug("no wq configured for cpu=%d\n", cpu);
> + return -ENODEV;
> + }
> +
> + ret = iaa_wq_get(wq);
> + if (ret) {
> + pr_debug("no wq available for cpu=%d\n", cpu);
> + return -ENODEV;
> + }
> +
> + iaa_wq = idxd_wq_get_private(wq);
> +
> + if (!req->dst) {
> + gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
> + /* incompressible data will always be < 2 * slen */
> + req->dlen = 2 * req->slen;
2 * req->slen is an estimated size for dst buf. When slen is greater
than 2048 bytes, dlen is greater than 4096 bytes.
> + req->dst = sgl_alloc(req->dlen, flags, NULL);
> + if (!req->dst) {
> + ret = -ENOMEM;
> + goto out;
> + }
> + }
> +
> + dev = &wq->idxd->pdev->dev;
> +
> + nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
> + if (nr_sgs <= 0 || nr_sgs > 1) {
> + dev_dbg(dev, "couldn't map src sg for iaa device %d,"
> + " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
> + iaa_wq->wq->id, ret);
> + ret = -EIO;
> + goto out;
> + }
> + src_addr = sg_dma_address(req->src);
> + dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
> + " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
> + req->src, req->slen, sg_dma_len(req->src));
> +
> + nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
> + if (nr_sgs <= 0 || nr_sgs > 1) {
when dlen is greater than 4096 bytes, nr_sgs maybe greater than 1,
but the actual output size maybe less than 4096 bytes.
In other words, the condition nr_sgs > 1 may block a case which could
have been done.
> + dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
> + " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
> + iaa_wq->wq->id, ret);
> + ret = -EIO;
> + goto err_map_dst;
> + }
> + dst_addr = sg_dma_address(req->dst);
> + dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
> + " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
> + req->dst, req->dlen, sg_dma_len(req->dst));
> +
> + ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr,
> + &req->dlen, &compression_crc, false);
> + if (ret == -EINPROGRESS)
> + return ret;
> +
> + if (!ret && compression_ctx->verify_compress) {
> + dma_sync_sg_for_device(dev, req->dst, 1, DMA_FROM_DEVICE);
> + dma_sync_sg_for_device(dev, req->src, 1, DMA_TO_DEVICE);
> + ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen,
> + dst_addr, &req->dlen, compression_crc);
> + }
> +
> + if (ret)
> + dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
> +
> + dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
> +err_map_dst:
> + dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
> +out:
> + iaa_wq_put(wq);
> +
> + return ret;
> +}
> +
> +static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req)
> +{
> + gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
> + GFP_KERNEL : GFP_ATOMIC;
> + struct crypto_tfm *tfm = req->base.tfm;
> + dma_addr_t src_addr, dst_addr;
> + int nr_sgs, cpu, ret = 0;
> + struct iaa_wq *iaa_wq;
> + struct device *dev;
> + struct idxd_wq *wq;
> +
> + cpu = get_cpu();
> + wq = wq_table_next_wq(cpu);
> + put_cpu();
> + if (!wq) {
> + pr_debug("no wq configured for cpu=%d\n", cpu);
> + return -ENODEV;
> + }
> +
> + ret = iaa_wq_get(wq);
> + if (ret) {
> + pr_debug("no wq available for cpu=%d\n", cpu);
> + return -ENODEV;
> + }
> +
> + iaa_wq = idxd_wq_get_private(wq);
> +
> + dev = &wq->idxd->pdev->dev;
> +
> + nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
> + if (nr_sgs <= 0 || nr_sgs > 1) {
> + dev_dbg(dev, "couldn't map src sg for iaa device %d,"
> + " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
> + iaa_wq->wq->id, ret);
> + ret = -EIO;
> + goto out;
> + }
> + src_addr = sg_dma_address(req->src);
> + dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
> + " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
> + req->src, req->slen, sg_dma_len(req->src));
> +
> + req->dlen = 4 * req->slen; /* start with ~avg comp rato */
4 * req->slen is an estimated size for dst buf. When slen is greater
than 1024 bytes, dlen is greater than 4096 bytes.
> +alloc_dest:
> + req->dst = sgl_alloc(req->dlen, flags, NULL);
> + if (!req->dst) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
> + if (nr_sgs <= 0 || nr_sgs > 1) {
When dlen is greater than 4096 bytes, nr_sgs maybe greater than 1,
it may cause the src data compressed by iaa crypto can't be
decompressed by iaa crypto.
> + dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
> + " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
> + iaa_wq->wq->id, ret);
> + ret = -EIO;
> + goto err_map_dst;
> + }
> +
> + dst_addr = sg_dma_address(req->dst);
> + dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
> + " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
> + req->dst, req->dlen, sg_dma_len(req->dst));
> + ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
> + dst_addr, &req->dlen, true);
> + if (ret == -EOVERFLOW) {
> + dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
> + sgl_free(req->dst);
> + req->dlen *= 2;
> + if (req->dlen > CRYPTO_ACOMP_DST_MAX)
> + goto err_map_dst;
> + goto alloc_dest;
> + }
> +
> + if (ret != 0)
> + dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
> +
> + dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
> +err_map_dst:
> + dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
> +out:
> + iaa_wq_put(wq);
> +
> + return ret;
> +}
> +
> +static int iaa_comp_adecompress(struct acomp_req *req)
> +{
> + struct crypto_tfm *tfm = req->base.tfm;
> + dma_addr_t src_addr, dst_addr;
> + int nr_sgs, cpu, ret = 0;
> + struct iaa_wq *iaa_wq;
> + struct device *dev;
> + struct idxd_wq *wq;
> +
> + if (!iaa_crypto_enabled) {
> + pr_debug("iaa_crypto disabled, not decompressing\n");
> + return -ENODEV;
> + }
> +
> + if (!req->src || !req->slen) {
> + pr_debug("invalid src, not decompressing\n");
> + return -EINVAL;
> + }
> +
> + if (!req->dst)
> + return iaa_comp_adecompress_alloc_dest(req);
> +
> + cpu = get_cpu();
> + wq = wq_table_next_wq(cpu);
> + put_cpu();
> + if (!wq) {
> + pr_debug("no wq configured for cpu=%d\n", cpu);
> + return -ENODEV;
> + }
> +
> + ret = iaa_wq_get(wq);
> + if (ret) {
> + pr_debug("no wq available for cpu=%d\n", cpu);
> + return -ENODEV;
> + }
> +
> + iaa_wq = idxd_wq_get_private(wq);
> +
> + dev = &wq->idxd->pdev->dev;
> +
> + nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
> + if (nr_sgs <= 0 || nr_sgs > 1) {
> + dev_dbg(dev, "couldn't map src sg for iaa device %d,"
> + " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
> + iaa_wq->wq->id, ret);
> + ret = -EIO;
> + goto out;
> + }
> + src_addr = sg_dma_address(req->src);
> + dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
> + " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
> + req->src, req->slen, sg_dma_len(req->src));
> +
> + nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
> + if (nr_sgs <= 0 || nr_sgs > 1) {
> + dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
> + " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
> + iaa_wq->wq->id, ret);
> + ret = -EIO;
> + goto err_map_dst;
> + }
> + dst_addr = sg_dma_address(req->dst);
> + dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
> + " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
> + req->dst, req->dlen, sg_dma_len(req->dst));
> +
> + ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
> + dst_addr, &req->dlen, false);
> + if (ret == -EINPROGRESS)
> + return ret;
> +
> + if (ret != 0)
> + dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
> +
> + dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
> +err_map_dst:
> + dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
> +out:
> + iaa_wq_put(wq);
> +
> + return ret;
> +}
> +
> +static void compression_ctx_init(struct iaa_compression_ctx *ctx)
> +{
> + ctx->verify_compress = iaa_verify_compress;
> +}
> +
> +static int iaa_comp_init_fixed(struct crypto_acomp *acomp_tfm)
> +{
> + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
> + struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
> +
> + compression_ctx_init(ctx);
> +
> + ctx->mode = IAA_MODE_FIXED;
> +
> + return 0;
> +}
> +
> +static struct acomp_alg iaa_acomp_fixed_deflate = {
> + .init = iaa_comp_init_fixed,
> + .compress = iaa_comp_acompress,
> + .decompress = iaa_comp_adecompress,
> + .dst_free = sgl_free,
> + .base = {
> + .cra_name = "deflate-iaa",
> + .cra_driver_name = "deflate_iaa",
> + .cra_ctxsize = sizeof(struct iaa_compression_ctx),
> + .cra_module = THIS_MODULE,
> + }
> +};
> +
> +static int iaa_register_compression_device(void)
> +{
> + int ret;
> +
> + ret = crypto_register_acomp(&iaa_acomp_fixed_deflate);
> + if (ret) {
> + pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret);
> + goto out;
> + }
> +
> + iaa_crypto_registered = true;
> +out:
> + return ret;
> +}
> +
> +static int iaa_unregister_compression_device(void)
> +{
> + if (iaa_crypto_registered)
> + crypto_unregister_acomp(&iaa_acomp_fixed_deflate);
> +
> + return 0;
> +}
> +
> static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
> {
> struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
> @@ -741,6 +1564,11 @@ static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
>
> mutex_lock(&wq->wq_lock);
>
> + if (idxd_wq_get_private(wq)) {
> + mutex_unlock(&wq->wq_lock);
> + return -EBUSY;
> + }
> +
> if (!idxd_wq_driver_name_match(wq, dev)) {
> dev_dbg(dev, "wq %d.%d driver_name match failed: wq driver_name %s, dev driver name %s\n",
> idxd->id, wq->id, wq->driver_name, dev->driver->name);
> @@ -774,12 +1602,28 @@ static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
>
> rebalance_wq_table();
>
> + if (first_wq) {
> + iaa_crypto_enabled = true;
> + ret = iaa_register_compression_device();
> + if (ret != 0) {
> + iaa_crypto_enabled = false;
> + dev_dbg(dev, "IAA compression device registration failed\n");
> + goto err_register;
> + }
> + try_module_get(THIS_MODULE);
> +
> + pr_info("iaa_crypto now ENABLED\n");
> + }
> +
> mutex_unlock(&iaa_devices_lock);
> out:
> mutex_unlock(&wq->wq_lock);
>
> return ret;
>
> +err_register:
> + remove_iaa_wq(wq);
> + free_iaa_wq(idxd_wq_get_private(wq));
> err_save:
> if (first_wq)
> free_wq_table();
> @@ -795,6 +1639,9 @@ static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
> static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
> {
> struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
> + struct idxd_device *idxd = wq->idxd;
> + struct iaa_wq *iaa_wq;
> + bool free = false;
>
> idxd_wq_quiesce(wq);
>
> @@ -802,11 +1649,37 @@ static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
> mutex_lock(&iaa_devices_lock);
>
> remove_iaa_wq(wq);
> +
> + spin_lock(&idxd->dev_lock);
> + iaa_wq = idxd_wq_get_private(wq);
> + if (!iaa_wq) {
> + pr_err("%s: no iaa_wq available to remove\n", __func__);
> + return;
> + }
> +
> + if (iaa_wq->ref) {
> + iaa_wq->remove = true;
> + } else {
> + wq = iaa_wq->wq;
> + __free_iaa_wq(iaa_wq);
> + idxd_wq_set_private(wq, NULL);
> + free = true;
> + }
> + spin_unlock(&idxd->dev_lock);
> +
> + if (free)
> + kfree(iaa_wq);
> +
> drv_disable_wq(wq);
> rebalance_wq_table();
>
> - if (nr_iaa == 0)
> + if (nr_iaa == 0) {
> + iaa_crypto_enabled = false;
> free_wq_table();
> + module_put(THIS_MODULE);
> +
> + pr_info("iaa_crypto now DISABLED\n");
> + }
>
> mutex_unlock(&iaa_devices_lock);
> mutex_unlock(&wq->wq_lock);
> @@ -844,10 +1717,19 @@ static int __init iaa_crypto_init_module(void)
> goto err_driver_reg;
> }
>
> + ret = driver_create_file(&iaa_crypto_driver.drv,
> + &driver_attr_verify_compress);
> + if (ret) {
> + pr_debug("IAA verify_compress attr creation failed\n");
> + goto err_verify_attr_create;
> + }
> +
> pr_debug("initialized\n");
> out:
> return ret;
>
> +err_verify_attr_create:
> + idxd_driver_unregister(&iaa_crypto_driver);
> err_driver_reg:
> iaa_aecs_cleanup_fixed();
>
> @@ -856,6 +1738,11 @@ static int __init iaa_crypto_init_module(void)
>
> static void __exit iaa_crypto_cleanup_module(void)
> {
> + if (iaa_unregister_compression_device())
> + pr_debug("IAA compression device unregister failed\n");
> +
> + driver_remove_file(&iaa_crypto_driver.drv,
> + &driver_attr_verify_compress);
> idxd_driver_unregister(&iaa_crypto_driver);
> iaa_aecs_cleanup_fixed();
>
> --
> 2.34.1
>

Thanks.
Rex Zhang