Some platforms using fastrpc do not support DMA coherency on HLOS. On such systems, explicit cache maintenance is required to ensure data consistency for RPC argument buffers.
Add cache maintenance for argument buffers when operating on non-coherent platforms: - Flush input buffers before invoking RPC to ensure CPU writes are visible to the DSP - Invalidate output buffers after RPC completion to ensure DSP writes are visible to the CPU Introduce helper functions fastrpc_flush_args() and fastrpc_inv_args() to perform the required dma-buf cache operations. These are invoked only when the device is not marked as DMA coherent. The coherency capability is determined using the "dma-coherent" device tree property and stored per session context. This ensures correct data synchronization on platforms lacking DMA coherency, while avoiding unnecessary overhead on coherent systems. Signed-off-by: Abhinav Parihar <[email protected]> --- drivers/misc/fastrpc.c | 66 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 1080f9acf70a..043b6a5548fb 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -255,6 +255,7 @@ struct fastrpc_session_ctx { int sid; bool used; bool valid; + bool coherent; }; struct fastrpc_soc_data { @@ -973,6 +974,64 @@ static int fastrpc_create_maps(struct fastrpc_invoke_ctx *ctx) return 0; } +static void fastrpc_flush_args(struct fastrpc_invoke_ctx *ctx) +{ + union fastrpc_remote_arg *rpra = ctx->rpra; + int i, inbufs, outbufs; + + inbufs = REMOTE_SCALARS_INBUFS(ctx->sc); + outbufs = REMOTE_SCALARS_OUTBUFS(ctx->sc); + + for (i = 0; i < inbufs + outbufs; ++i) { + int raix = ctx->olaps[i].raix; + struct fastrpc_map *map = ctx->maps[raix]; + + if (raix + 1 > inbufs) + continue; + if (!map || !map->buf) + continue; + + if (rpra[raix].buf.len && ctx->olaps[i].mstart) { + dma_buf_begin_cpu_access(map->buf, DMA_TO_DEVICE); + dma_buf_end_cpu_access(map->buf, DMA_TO_DEVICE); + } + } +} + +static void fastrpc_inv_args(struct fastrpc_invoke_ctx *ctx) +{ + union fastrpc_remote_arg *rpra = ctx->rpra; + int i, inbufs, outbufs; + + inbufs = REMOTE_SCALARS_INBUFS(ctx->sc); + outbufs = REMOTE_SCALARS_OUTBUFS(ctx->sc); + + for (i = 0; i < inbufs + outbufs; ++i) { + int raix = ctx->olaps[i].raix; + struct fastrpc_map *map = ctx->maps[raix]; + + if (raix + 1 <= inbufs) + continue; + if (!rpra[raix].buf.len) + continue; + if (!map || !map->buf) + continue; + + /* + * Skip invalidation if the argument overlaps with the + * RPC control header page. + */ + if (((uintptr_t)rpra & PAGE_MASK) == + ((uintptr_t)rpra[raix].buf.pv & PAGE_MASK)) + continue; + + if (ctx->olaps[i].mstart) { + dma_buf_begin_cpu_access(map->buf, DMA_FROM_DEVICE); + dma_buf_end_cpu_access(map->buf, DMA_TO_DEVICE); + } + } +} + static struct fastrpc_invoke_buf *fastrpc_invoke_buf_start(union fastrpc_remote_arg *pra, int len) { return (struct fastrpc_invoke_buf *)(&pra[len]); @@ -1093,6 +1152,9 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) } } + if (!ctx->fl->sctx->coherent) + fastrpc_flush_args(ctx); + for (i = ctx->nbufs; i < ctx->nscalars; ++i) { list[i].num = ctx->args[i].length ? 1 : 0; list[i].pgidx = i; @@ -1239,6 +1301,9 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, /* make sure that all memory writes by DSP are seen by CPU */ dma_rmb(); + if (!fl->sctx->coherent) + fastrpc_inv_args(ctx); + /* populate all the output buffers with results */ err = fastrpc_put_args(ctx, kernel); if (err) @@ -2217,6 +2282,7 @@ static int fastrpc_cb_probe(struct platform_device *pdev) sess->used = false; sess->valid = true; sess->dev = dev; + sess->coherent = of_property_read_bool(dev->of_node, "dma-coherent"); dev_set_drvdata(dev, sess); if (cctx->domain_id == CDSP_DOMAIN_ID) -- 2.34.1
