[RFC][PATCH 2/2] crypto: caam - handle core endianness != caam endianness

From: Horia GeantÄ
Date: Fri Aug 28 2015 - 07:50:45 EST


There are SoCs like LS1043A where CAAM endianness (BE) does not match
the endianness of the core (LE).
Moreover, there are requirements for the driver to handle cases like
CPU_BIG_ENDIAN=y on ARM-based SoCs.
This requires for a complete rewrite of the I/O accessors.

PPC-specific accessors - {in,out}_{le,be}XX - are replaced with
generic io{read,write}[be]XX where possible (no 64-bit generic I/O).

Signed-off-by: Horia GeantÄ <horia.geanta@xxxxxxxxxxxxx>
Signed-off-by: Alex Porosanu <alexandru.porosanu@xxxxxxxxxxxxx>
---

While patch takes into consideration the S/G format (struct sec4_sg_entry)
for i.MX7, I don't have a board so I haven't tested this platform.

drivers/crypto/caam/caamhash.c | 5 +--
drivers/crypto/caam/ctrl.c | 2 +-
drivers/crypto/caam/desc.h | 9 ++++-
drivers/crypto/caam/desc_constr.h | 42 +++++++++++++++--------
drivers/crypto/caam/jr.c | 8 ++---
drivers/crypto/caam/regs.h | 72 ++++++++++++++++++++++++++++-----------
drivers/crypto/caam/sg_sw_sec4.h | 10 +++---
7 files changed, 102 insertions(+), 46 deletions(-)

diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 72acf8e5ac2f..c39332171fa3 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -841,7 +841,7 @@ static int ahash_update_ctx(struct ahash_request *req)
*next_buflen, 0);
} else {
(edesc->sec4_sg + sec4_sg_src_index - 1)->len |=
- SEC4_SG_LEN_FIN;
+ cpu_to_caam32(SEC4_SG_LEN_FIN);
}

state->current_buf = !state->current_buf;
@@ -942,7 +942,8 @@ static int ahash_final_ctx(struct ahash_request *req)
state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1,
buf, state->buf_dma, buflen,
last_buflen);
- (edesc->sec4_sg + sec4_sg_bytes - 1)->len |= SEC4_SG_LEN_FIN;
+ (edesc->sec4_sg + sec4_sg_bytes - 1)->len |=
+ cpu_to_caam32(SEC4_SG_LEN_FIN);

edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
sec4_sg_bytes, DMA_TO_DEVICE);
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 8abb4bc548cc..e527d1e0b9ab 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -128,7 +128,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc,
}

for (i = 0; i < desc_len(desc); i++)
- wr_reg32(&deco->descbuf[i], *(desc + i));
+ wr_reg32(&deco->descbuf[i], caam32_to_cpu(*(desc + i)));

flags = DECO_JQCR_WHL;
/*
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index 983d663ef671..fc257b343d3b 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -23,16 +23,23 @@
#define SEC4_SG_OFFS_MASK 0x00001fff

struct sec4_sg_entry {
-#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
+#if !defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && \
+ defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX)
u32 rsvd1;
dma_addr_t ptr;
#else
u64 ptr;
#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_IMX */
u32 len;
+#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_LE
u8 rsvd2;
u8 buf_pool_id;
u16 offset;
+#else
+ u16 offset;
+ u8 buf_pool_id;
+ u8 rsvd2;
+#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_LE */
};

/* Max size of any CAAM descriptor in 32-bit words, inclusive of header */
diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h
index 98d07de24fc4..40baf4752fbc 100644
--- a/drivers/crypto/caam/desc_constr.h
+++ b/drivers/crypto/caam/desc_constr.h
@@ -5,6 +5,7 @@
*/

#include "desc.h"
+#include "regs.h"

#define IMMEDIATE (1 << 23)
#define CAAM_CMD_SZ sizeof(u32)
@@ -32,7 +33,7 @@

static inline int desc_len(u32 *desc)
{
- return *desc & HDR_DESCLEN_MASK;
+ return caam32_to_cpu(*desc) & HDR_DESCLEN_MASK;
}

static inline int desc_bytes(void *desc)
@@ -52,7 +53,7 @@ static inline void *sh_desc_pdb(u32 *desc)

static inline void init_desc(u32 *desc, u32 options)
{
- *desc = (options | HDR_ONE) + 1;
+ *desc = cpu_to_caam32((options | HDR_ONE) + 1);
}

static inline void init_sh_desc(u32 *desc, u32 options)
@@ -78,9 +79,10 @@ static inline void append_ptr(u32 *desc, dma_addr_t ptr)
{
dma_addr_t *offset = (dma_addr_t *)desc_end(desc);

- *offset = ptr;
+ *offset = wr_dma(ptr);

- (*desc) += CAAM_PTR_SZ / CAAM_CMD_SZ;
+ (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) +
+ CAAM_PTR_SZ / CAAM_CMD_SZ);
}

static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len,
@@ -99,16 +101,17 @@ static inline void append_data(u32 *desc, void *data, int len)
if (len) /* avoid sparse warning: memcpy with byte count of 0 */
memcpy(offset, data, len);

- (*desc) += (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ;
+ (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) +
+ (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ);
}

static inline void append_cmd(u32 *desc, u32 command)
{
u32 *cmd = desc_end(desc);

- *cmd = command;
+ *cmd = cpu_to_caam32(command);

- (*desc)++;
+ (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + 1);
}

#define append_u32 append_cmd
@@ -117,16 +120,22 @@ static inline void append_u64(u32 *desc, u64 data)
{
u32 *offset = desc_end(desc);

- *offset = upper_32_bits(data);
- *(++offset) = lower_32_bits(data);
+ /* Only 32-bit alignment is guaranteed in descriptor buffer */
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_FSL_CAAM_LE)) {
+ *offset = cpu_to_caam32(lower_32_bits(data));
+ *(++offset) = cpu_to_caam32(upper_32_bits(data));
+ } else {
+ *offset = cpu_to_caam32(upper_32_bits(data));
+ *(++offset) = cpu_to_caam32(lower_32_bits(data));
+ }

- (*desc) += 2;
+ (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + 2);
}

/* Write command without affecting header, and return pointer to next word */
static inline u32 *write_cmd(u32 *desc, u32 command)
{
- *desc = command;
+ *desc = cpu_to_caam32(command);

return desc + 1;
}
@@ -168,14 +177,17 @@ APPEND_CMD_RET(move, MOVE)

static inline void set_jump_tgt_here(u32 *desc, u32 *jump_cmd)
{
- *jump_cmd = *jump_cmd | (desc_len(desc) - (jump_cmd - desc));
+ *jump_cmd = cpu_to_caam32(caam32_to_cpu(*jump_cmd) |
+ (desc_len(desc) - (jump_cmd - desc)));
}

static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd)
{
- *move_cmd &= ~MOVE_OFFSET_MASK;
- *move_cmd = *move_cmd | ((desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) &
- MOVE_OFFSET_MASK);
+ u32 val = caam32_to_cpu(*move_cmd);
+
+ val &= ~MOVE_OFFSET_MASK;
+ val |= (desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) & MOVE_OFFSET_MASK;
+ *move_cmd = cpu_to_caam32(val);
}

#define APPEND_CMD(cmd, op) \
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index f7e0d8d4c3da..5add26b2a2ff 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -182,7 +182,7 @@ static void caam_jr_dequeue(unsigned long devarg)
sw_idx = (tail + i) & (JOBR_DEPTH - 1);

if (jrp->outring[hw_idx].desc ==
- jrp->entinfo[sw_idx].desc_addr_dma)
+ rd_dma(jrp->entinfo[sw_idx].desc_addr_dma))
break; /* found */
}
/* we should never fail to find a matching descriptor */
@@ -200,7 +200,7 @@ static void caam_jr_dequeue(unsigned long devarg)
usercall = jrp->entinfo[sw_idx].callbk;
userarg = jrp->entinfo[sw_idx].cbkarg;
userdesc = jrp->entinfo[sw_idx].desc_addr_virt;
- userstatus = jrp->outring[hw_idx].jrstatus;
+ userstatus = caam32_to_cpu(jrp->outring[hw_idx].jrstatus);

/*
* Make sure all information from the job has been obtained
@@ -330,7 +330,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
int head, tail, desc_size;
dma_addr_t desc_dma;

- desc_size = (*desc & HDR_JD_LENGTH_MASK) * sizeof(u32);
+ desc_size = (caam32_to_cpu(*desc) & HDR_JD_LENGTH_MASK) * sizeof(u32);
desc_dma = dma_map_single(dev, desc, desc_size, DMA_TO_DEVICE);
if (dma_mapping_error(dev, desc_dma)) {
dev_err(dev, "caam_jr_enqueue(): can't map jobdesc\n");
@@ -356,7 +356,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
head_entry->cbkarg = areq;
head_entry->desc_addr_dma = desc_dma;

- jrp->inpring[jrp->inp_ring_write_index] = desc_dma;
+ jrp->inpring[jrp->inp_ring_write_index] = wr_dma(desc_dma);

/*
* Guarantee that the descriptor's DMA address has been written to
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index a8a79975682f..d3e93e066cd8 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -65,7 +65,7 @@
*
*/

-#ifdef CONFIG_ARM
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
/* These are common macros for Power, put here for ARM */
#define setbits32(_addr, _v) writel((readl(_addr) | (_v)), (_addr))
#define clrbits32(_addr, _v) writel((readl(_addr) & ~(_v)), (_addr))
@@ -86,26 +86,62 @@
#define clrsetbits_le32(addr, clear, set) clrsetbits(le32, addr, clear, set)
#endif

-#ifdef __BIG_ENDIAN
-#define wr_reg32(reg, data) out_be32(reg, data)
-#define rd_reg32(reg) in_be32(reg)
+#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_LE
+#define caam16_to_cpu(value) le16_to_cpu(value)
+#define cpu_to_caam16(value) cpu_to_le16(value)
+#define caam32_to_cpu(value) le32_to_cpu(value)
+#define cpu_to_caam32(value) cpu_to_le32(value)
+#define caam64_to_cpu(value) le64_to_cpu(value)
+#define cpu_to_caam64(value) cpu_to_le64(value)
+#define wr_reg32(reg, data) iowrite32(data, reg)
+#define rd_reg32(reg) ioread32(reg)
+#define clrsetbits_32(addr, clear, set) clrsetbits_le32(addr, clear, set)
+#else
+#define caam16_to_cpu(value) be16_to_cpu(value)
+#define cpu_to_caam16(value) cpu_to_be16(value)
+#define caam32_to_cpu(value) be32_to_cpu(value)
+#define cpu_to_caam32(value) cpu_to_be32(value)
+#define caam64_to_cpu(value) be64_to_cpu(value)
+#define cpu_to_caam64(value) cpu_to_be64(value)
+#define wr_reg32(reg, data) iowrite32be(data, reg)
+#define rd_reg32(reg) ioread32be(reg)
#define clrsetbits_32(addr, clear, set) clrsetbits_be32(addr, clear, set)
-#ifdef CONFIG_64BIT
-#define wr_reg64(reg, data) out_be64(reg, data)
-#define rd_reg64(reg) in_be64(reg)
#endif
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+#ifdef CONFIG_SOC_IMX7D
+#define wr_dma(value) (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
+ (u64)cpu_to_caam32(higher_32_bits(value)))
+#define rd_dma(value) (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \
+ (u64)caam32_to_cpu(higher_32_bits(value)))
#else
-#ifdef __LITTLE_ENDIAN
-#define wr_reg32(reg, data) __raw_writel(data, reg)
-#define rd_reg32(reg) __raw_readl(reg)
-#define clrsetbits_32(addr, clear, set) clrsetbits_le32(addr, clear, set)
-#ifdef CONFIG_64BIT
-#define wr_reg64(reg, data) __raw_writeq(data, reg)
-#define rd_reg64(reg) __raw_readq(reg)
-#endif
-#endif
+#define wr_dma(value) cpu_to_caam64(value)
+#define rd_dma(value) caam64_to_cpu(value)
+#endif /* CONFIG_SOC_IMX7D */
+#else
+#define wr_dma(value) cpu_to_caam32(value)
+#define rd_dma(value) caam32_to_cpu(value)
#endif

+#ifdef CONFIG_64BIT
+#ifdef CONFIG_PPC
+#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_LE
+#define wr_reg64(reg, data) out_le64(reg, data)
+#define rd_reg64(reg) in_le64(reg)
+#else
+#define wr_reg64(reg, data) out_be64(reg, data)
+#define rd_reg64(reg) in_be64(reg)
+#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_LE */
+#else /* CONFIG_PPC */
+#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_LE
+#define wr_reg64(reg, data) writeq(data, reg)
+#define rd_reg64(reg) readq(reg)
+#else
+#define wr_reg64(reg, data) iowrite64be(data, reg)
+#define rd_reg64(reg) ioread64be(reg)
+#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_LE */
+#endif /* CONFIG_PPC */
+#else /* CONFIG_64BIT */
/*
* The only users of these wr/rd_reg64 functions is the Job Ring (JR).
* The DMA address registers in the JR are handled differently depending on
@@ -123,8 +159,6 @@
* base + 0x0000 : least-significant 32 bits
* base + 0x0004 : most-significant 32 bits
*/
-
-#ifndef CONFIG_64BIT
#if !defined(CONFIG_CRYPTO_DEV_FSL_CAAM_LE) || \
defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX)
#define REG64_MS32(reg) ((u32 __iomem *)(reg))
@@ -145,7 +179,7 @@ static inline u64 rd_reg64(u64 __iomem *reg)
return ((u64)rd_reg32(REG64_MS32(reg)) << 32 |
(u64)rd_reg32(REG64_LS32(reg)));
}
-#endif
+#endif /* CONFIG_64BIT */

/*
* jr_outentry
diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h
index 18cd6d1f5870..fb8e0084e3e5 100644
--- a/drivers/crypto/caam/sg_sw_sec4.h
+++ b/drivers/crypto/caam/sg_sw_sec4.h
@@ -5,6 +5,8 @@
*
*/

+#include "regs.h"
+
struct sec4_sg_entry;

/*
@@ -13,10 +15,10 @@ struct sec4_sg_entry;
static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
dma_addr_t dma, u32 len, u32 offset)
{
- sec4_sg_ptr->ptr = dma;
- sec4_sg_ptr->len = len;
+ sec4_sg_ptr->ptr = wr_dma(dma);
+ sec4_sg_ptr->len = cpu_to_caam32(len);
sec4_sg_ptr->buf_pool_id = 0;
- sec4_sg_ptr->offset = offset;
+ sec4_sg_ptr->offset = cpu_to_caam16(offset & SEC4_SG_OFFS_MASK);
#ifdef DEBUG
print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ",
DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr,
@@ -51,7 +53,7 @@ static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count,
u32 offset)
{
sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset);
- sec4_sg_ptr->len |= SEC4_SG_LEN_FIN;
+ sec4_sg_ptr->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
}

static inline struct sec4_sg_entry *sg_to_sec4_sg_len(
--
2.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/