From: Vineet Gupta <vgu...@kernel.org>

Signed-off-by: Vineet Gupta <vgu...@kernel.org>
---
 arch/arc/lib/memset-archs.S | 112 ++++++++++++++----------------------
 1 file changed, 43 insertions(+), 69 deletions(-)

diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
index 330e22f7cf3c..a9a0ccef761d 100644
--- a/arch/arc/lib/memset-archs.S
+++ b/arch/arc/lib/memset-archs.S
@@ -5,6 +5,7 @@
 
 #include <linux/linkage.h>
 #include <asm/cache.h>
+#include <asm/assembler.h>
 
 /*
  * The memset implementation below is optimized to use prefetchw and prealloc
@@ -55,7 +56,7 @@ ENTRY_CFI(memset)
 1:
 #endif
 
-;;; Destination is aligned
+       ; promote memset pattern from char to int (double actually for STD)
        and     r1, r1, 0xFF
        asl     r4, r1, 8
        or      r4, r4, r1
@@ -63,75 +64,48 @@ ENTRY_CFI(memset)
        or      r5, r5, r4
        mov     r4, r5
 
-       sub3    lp_count, r2, 8
-       cmp     r2, 64
-       bmsk.hi r2, r2, 5
-       mov.ls  lp_count, 0
-       add3.hi r2, r2, 8
-
-;;; Convert len to Dwords, unfold x8
-       lsr.f   lp_count, lp_count, 6
-
-       lpnz    @.Lset64bytes
-       ;; LOOP START
-       PREALLOC_INSTR  r3, 64  ; alloc next line w/o fetching
-
-#ifdef CONFIG_ARC_HAS_LL64
-       std.ab  r4, [r3, 8]
-       std.ab  r4, [r3, 8]
-       std.ab  r4, [r3, 8]
-       std.ab  r4, [r3, 8]
-       std.ab  r4, [r3, 8]
-       std.ab  r4, [r3, 8]
-       std.ab  r4, [r3, 8]
-       std.ab  r4, [r3, 8]
-#else
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-#endif
-.Lset64bytes:
-
-       lsr.f   lp_count, r2, 5 ;Last remaining  max 124 bytes
-       lpnz    .Lset32bytes
-       ;; LOOP START
-#ifdef CONFIG_ARC_HAS_LL64
-       std.ab  r4, [r3, 8]
-       std.ab  r4, [r3, 8]
-       std.ab  r4, [r3, 8]
-       std.ab  r4, [r3, 8]
-#else
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-       st.ab   r4, [r3, 4]
-#endif
-.Lset32bytes:
-
-       and.f   lp_count, r2, 0x1F ;Last remaining 31 bytes
-.Lsmallchunk:
-       lpnz    .Lcopy3bytes
-       ;; LOOP START
+       ; Loop #a:
+       ; - Updates 1 cache line worth data (64 bytes) per iteration
+       ; - PREALLOC the next line.
+       ;
+       ; = Only entered if at least 2 lines worth of work (i.e. >= 128 bytes),
+       ;   else PREALLOC for next can "bleed" past end of buffer, causing data
+       ;   corruption issue if that line is owned by some other core.
+       ; = Last 64 bytes (even for min 128 bytes work) are NOT done here to
+       ;   avoid PREALLOC issue
+
+       sub     r6, r2, 64
+       cmp     r2, 64
+       bmsk.hi r2, r2, 5       ; trailing 63 bytes
+       mov.ls  r6, 0
+       add.hi  r2, r2, 64      ; line skipped in loop below
+
+       lsr.f   lp_count, r6, 6
+       lpnz    2f
+       PREALLOCR r3, 64
+       ST64.ab r4, r3, 8
+       ST64.ab r4, r3, 8
+       ST64.ab r4, r3, 8
+       ST64.ab r4, r3, 8
+       ST64.ab r4, r3, 8
+       ST64.ab r4, r3, 8
+       ST64.ab r4, r3, 8
+       ST64.ab r4, r3, 8
+2:
+       ; Loop #b: Remaining 32 / 64 bytes
+       lsr.f   lp_count, r2, 5
+       lpnz    .Lbyteloop
+       ST64.ab r4, r3, 8
+       ST64.ab r4, r3, 8
+       ST64.ab r4, r3, 8
+       ST64.ab r4, r3, 8
+
+.Lbyteloop:
+       ; Loop #c: straggler 31 bytes
+       and.f   lp_count, r2, 0x1F
+       lpnz    4f
        stb.ab  r1, [r3, 1]
-.Lcopy3bytes:
-
+4:
        j       [blink]
 
 END_CFI(memset)
-- 
2.25.1


_______________________________________________
linux-snps-arc mailing list
linux-snps-arc@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-snps-arc

Reply via email to