Freescale i.MX6 ARM platforms do not support hardware cache coherency.  This
patch adds cache coherency support to the CAAM driver.

Signed-off-by: Victoria Milhoan <vicki.milh...@freescale.com>
---
 drivers/crypto/caam/caamhash.c   | 28 +++++++++++++++++----------
 drivers/crypto/caam/caamrng.c    | 10 +++++++++-
 drivers/crypto/caam/jr.c         | 42 +++++++++++++++++++++++++++++++++++++++-
 drivers/crypto/caam/key_gen.c    |  4 +++-
 drivers/crypto/caam/sg_sw_sec4.h |  1 +
 5 files changed, 72 insertions(+), 13 deletions(-)

diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index ba0532e..1662c65 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -500,6 +500,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const 
u8 *key_in,
 #endif
        }
        dma_unmap_single(jrdev, src_dma, *keylen, DMA_TO_DEVICE);
+       dma_sync_single_for_cpu(jrdev, dst_dma, digestsize, DMA_FROM_DEVICE);
        dma_unmap_single(jrdev, dst_dma, digestsize, DMA_FROM_DEVICE);
 
        *keylen = digestsize;
@@ -608,8 +609,11 @@ static inline void ahash_unmap(struct device *dev,
        if (edesc->src_nents)
                dma_unmap_sg_chained(dev, req->src, edesc->src_nents,
                                     DMA_TO_DEVICE, edesc->chained);
-       if (edesc->dst_dma)
+       if (edesc->dst_dma) {
+               dma_sync_single_for_cpu(dev, edesc->dst_dma, dst_len,
+                                       DMA_FROM_DEVICE);
                dma_unmap_single(dev, edesc->dst_dma, dst_len, DMA_FROM_DEVICE);
+       }
 
        if (edesc->sec4_sg_bytes)
                dma_unmap_single(dev, edesc->sec4_sg_dma,
@@ -624,8 +628,12 @@ static inline void ahash_unmap_ctx(struct device *dev,
        struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
        struct caam_hash_state *state = ahash_request_ctx(req);
 
-       if (state->ctx_dma)
+       if (state->ctx_dma) {
+               if ((flag == DMA_FROM_DEVICE) || (flag == DMA_BIDIRECTIONAL))
+                       dma_sync_single_for_cpu(dev, state->ctx_dma,
+                                               ctx->ctx_len, flag);
                dma_unmap_single(dev, state->ctx_dma, ctx->ctx_len, flag);
+       }
        ahash_unmap(dev, edesc, req, dst_len);
 }
 
@@ -807,7 +815,7 @@ static int ahash_update_ctx(struct ahash_request *req)
                 * allocate space for base edesc and hw desc commands,
                 * link tables
                 */
-               edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
+               edesc = kzalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
                                sec4_sg_bytes, GFP_DMA | flags);
                if (!edesc) {
                        dev_err(jrdev,
@@ -918,7 +926,7 @@ static int ahash_final_ctx(struct ahash_request *req)
        sec4_sg_bytes = (1 + (buflen ? 1 : 0)) * sizeof(struct sec4_sg_entry);
 
        /* allocate space for base edesc and hw desc commands, link tables */
-       edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
+       edesc = kzalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
                        sec4_sg_bytes, GFP_DMA | flags);
        if (!edesc) {
                dev_err(jrdev, "could not allocate extended descriptor\n");
@@ -1005,7 +1013,7 @@ static int ahash_finup_ctx(struct ahash_request *req)
                         sizeof(struct sec4_sg_entry);
 
        /* allocate space for base edesc and hw desc commands, link tables */
-       edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
+       edesc = kzalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
                        sec4_sg_bytes, GFP_DMA | flags);
        if (!edesc) {
                dev_err(jrdev, "could not allocate extended descriptor\n");
@@ -1091,7 +1099,7 @@ static int ahash_digest(struct ahash_request *req)
        sec4_sg_bytes = src_nents * sizeof(struct sec4_sg_entry);
 
        /* allocate space for base edesc and hw desc commands, link tables */
-       edesc = kmalloc(sizeof(struct ahash_edesc) + sec4_sg_bytes +
+       edesc = kzalloc(sizeof(struct ahash_edesc) + sec4_sg_bytes +
                        DESC_JOB_IO_LEN, GFP_DMA | flags);
        if (!edesc) {
                dev_err(jrdev, "could not allocate extended descriptor\n");
@@ -1165,7 +1173,7 @@ static int ahash_final_no_ctx(struct ahash_request *req)
        int sh_len;
 
        /* allocate space for base edesc and hw desc commands, link tables */
-       edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN,
+       edesc = kzalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN,
                        GFP_DMA | flags);
        if (!edesc) {
                dev_err(jrdev, "could not allocate extended descriptor\n");
@@ -1245,7 +1253,7 @@ static int ahash_update_no_ctx(struct ahash_request *req)
                 * allocate space for base edesc and hw desc commands,
                 * link tables
                 */
-               edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
+               edesc = kzalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
                                sec4_sg_bytes, GFP_DMA | flags);
                if (!edesc) {
                        dev_err(jrdev,
@@ -1352,7 +1360,7 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
                         sizeof(struct sec4_sg_entry);
 
        /* allocate space for base edesc and hw desc commands, link tables */
-       edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
+       edesc = kzalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
                        sec4_sg_bytes, GFP_DMA | flags);
        if (!edesc) {
                dev_err(jrdev, "could not allocate extended descriptor\n");
@@ -1447,7 +1455,7 @@ static int ahash_update_first(struct ahash_request *req)
                 * allocate space for base edesc and hw desc commands,
                 * link tables
                 */
-               edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
+               edesc = kzalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN +
                                sec4_sg_bytes, GFP_DMA | flags);
                if (!edesc) {
                        dev_err(jrdev,
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index 26a544b..a8c4af9 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -80,9 +80,12 @@ static struct caam_rng_ctx *rng_ctx;
 
 static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd)
 {
-       if (bd->addr)
+       if (bd->addr) {
+               dma_sync_single_for_cpu(jrdev, bd->addr, RN_BUF_SIZE,
+                                       DMA_FROM_DEVICE);
                dma_unmap_single(jrdev, bd->addr, RN_BUF_SIZE,
                                 DMA_FROM_DEVICE);
+       }
 }
 
 static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx)
@@ -108,6 +111,10 @@ static void rng_done(struct device *jrdev, u32 *desc, u32 
err, void *context)
 
        atomic_set(&bd->empty, BUF_NOT_EMPTY);
        complete(&bd->filled);
+
+       /* Buffer refilled, invalidate cache */
+       dma_sync_single_for_cpu(jrdev, bd->addr, RN_BUF_SIZE, DMA_FROM_DEVICE);
+
 #ifdef DEBUG
        print_hex_dump(KERN_ERR, "rng refreshed buf@: ",
                       DUMP_PREFIX_ADDRESS, 16, 4, bd->buf, RN_BUF_SIZE, 1);
@@ -211,6 +218,7 @@ static inline int rng_create_sh_desc(struct caam_rng_ctx 
*ctx)
        print_hex_dump(KERN_ERR, "rng shdesc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
                       desc, desc_bytes(desc), 1);
 #endif
+
        return 0;
 }
 
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index b8b5d47..a693bf7 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -168,6 +168,9 @@ static void caam_jr_dequeue(unsigned long devarg)
        void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg);
        u32 *userdesc, userstatus;
        void *userarg;
+       dma_addr_t outbusaddr;
+
+       outbusaddr = rd_reg64(&jrp->rregs->outring_base);
 
        while (rd_reg32(&jrp->rregs->outring_used)) {
 
@@ -177,10 +180,19 @@ static void caam_jr_dequeue(unsigned long devarg)
 
                sw_idx = tail = jrp->tail;
                hw_idx = jrp->out_ring_read_index;
+               dma_sync_single_for_cpu(dev, outbusaddr,
+                                       sizeof(struct jr_outentry) * JOBR_DEPTH,
+                                       DMA_FROM_DEVICE);
 
                for (i = 0; CIRC_CNT(head, tail + i, JOBR_DEPTH) >= 1; i++) {
                        sw_idx = (tail + i) & (JOBR_DEPTH - 1);
 
+                       /*
+                        * Ensure that tail is read before using it as part of
+                        * the index into the software ring.
+                        */
+                       smp_read_barrier_depends();
+
                        if (jrp->outring[hw_idx].desc ==
                            jrp->entinfo[sw_idx].desc_addr_dma)
                                break; /* found */
@@ -202,6 +214,13 @@ static void caam_jr_dequeue(unsigned long devarg)
                userdesc = jrp->entinfo[sw_idx].desc_addr_virt;
                userstatus = jrp->outring[hw_idx].jrstatus;
 
+               /*
+                * Make sure all information from the job has been obtained
+                * before telling CAAM that the job has been removed from the
+                * output ring.
+                */
+               smp_mb();
+
                /* set done */
                wr_reg32(&jrp->rregs->outring_rmvd, 1);
 
@@ -216,6 +235,12 @@ static void caam_jr_dequeue(unsigned long devarg)
                if (sw_idx == tail) {
                        do {
                                tail = (tail + 1) & (JOBR_DEPTH - 1);
+
+                               /*
+                                * Ensure that tail is read before using it to
+                                * update the software ring's tail index
+                                */
+                               smp_read_barrier_depends();
                        } while (CIRC_CNT(head, tail, JOBR_DEPTH) >= 1 &&
                                 jrp->entinfo[tail].desc_addr_dma == 0);
 
@@ -321,7 +346,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
        struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
        struct caam_jrentry_info *head_entry;
        int head, tail, desc_size;
-       dma_addr_t desc_dma;
+       dma_addr_t desc_dma, inpbusaddr;
 
        desc_size = (*desc & HDR_JD_LENGTH_MASK) * sizeof(u32);
        desc_dma = dma_map_single(dev, desc, desc_size, DMA_TO_DEVICE);
@@ -330,6 +355,11 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
                return -EIO;
        }
 
+       inpbusaddr = rd_reg64(&jrp->rregs->inpring_base);
+       dma_sync_single_for_device(dev, inpbusaddr,
+                                       sizeof(dma_addr_t) * JOBR_DEPTH,
+                                       DMA_TO_DEVICE);
+
        spin_lock_bh(&jrp->inplock);
 
        head = jrp->head;
@@ -351,12 +381,22 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
 
        jrp->inpring[jrp->inp_ring_write_index] = desc_dma;
 
+       dma_sync_single_for_device(dev, inpbusaddr,
+                                       sizeof(dma_addr_t) * JOBR_DEPTH,
+                                       DMA_TO_DEVICE);
+
        smp_wmb();
 
        jrp->inp_ring_write_index = (jrp->inp_ring_write_index + 1) &
                                    (JOBR_DEPTH - 1);
        jrp->head = (head + 1) & (JOBR_DEPTH - 1);
 
+       /*
+        * Ensure that all job information has been written before
+        * notifying CAAM that a new job was added to the input ring.
+        */
+       wmb();
+
        wr_reg32(&jrp->rregs->inpring_jobadd, 1);
 
        spin_unlock_bh(&jrp->inplock);
diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c
index e1eaf4f..6481f71 100644
--- a/drivers/crypto/caam/key_gen.c
+++ b/drivers/crypto/caam/key_gen.c
@@ -71,6 +71,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int 
split_key_len,
        }
 
        init_job_desc(desc, 0);
+
        append_key(desc, dma_addr_in, keylen, CLASS_2 | KEY_DEST_CLASS_REG);
 
        /* Sets MDHA up into an HMAC-INIT */
@@ -111,7 +112,8 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int 
split_key_len,
                               split_key_pad_len, 1);
 #endif
        }
-
+       dma_sync_single_for_cpu(jrdev, dma_addr_out, split_key_pad_len,
+                               DMA_FROM_DEVICE);
        dma_unmap_single(jrdev, dma_addr_out, split_key_pad_len,
                         DMA_FROM_DEVICE);
 out_unmap_in:
diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h
index 3b91821..6365585 100644
--- a/drivers/crypto/caam/sg_sw_sec4.h
+++ b/drivers/crypto/caam/sg_sw_sec4.h
@@ -98,6 +98,7 @@ static int dma_map_sg_chained(struct device *dev, struct 
scatterlist *sg,
        } else {
                dma_map_sg(dev, sg, nents, dir);
        }
+
        return nents;
 }
 
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to