[PATCH 10/17] LoongArch: Implement atomic_fetch_nand

Xi Ruoyao Fri, 28 Feb 2025 21:10:15 -0800

Without atomic_fetch_nandsi and atomic_fetch_nanddi, __atomic_fetch_nand
is expanded to a loop containing a CAS in the body, and CAS itself is a
LL-SC loop so we have a nested loop.  This is obviously not a good idea
as we just need one LL-SC loop in fact.


As ~(atom & mask) is (~mask) | (~atom), we can just invert the mask
first and the body of the LL-SC loop would be just one orn instruction.

gcc/ChangeLog:

        * config/loongarch/sync.md
        (atomic_fetch_nand_mask_inverted<GPR:mode>): New define_insn.
        (atomic_fetch_nand<GPR:mode>): New define_expand.
---
 gcc/config/loongarch/sync.md | 40 ++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index 60038258f29..f1ab132163c 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -203,6 +203,46 @@ (define_insn "atomic_fetch_<amop><mode>"
   "am<amop>%A3.<size>\t%0,%z2,%1"
   [(set (attr "length") (const_int 4))])
 
+(define_insn "atomic_fetch_nand_mask_inverted<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&r")
+       (match_operand:GPR 1 "memory_operand" "+ZC"))
+   (set (match_dup 1)
+       (unspec_volatile:GPR
+         [(ior:GPR (not (match_dup 1))
+                   (match_operand:GPR 2 "register_operand" "r"))]
+         UNSPEC_SYNC_OLD_OP))
+   (clobber (match_scratch:GPR 3 "=&r"))]
+  ""
+  {
+    return "1:\\n\\t"
+          "ll.<d>\\t%0,%1\\n\\t"
+          "orn\\t%3,%2,%0\\n\\t"
+          "sc.<d>\\t%3,%1\\n\\t"
+          "beqz\\t%3,1b";
+  }
+  [(set (attr "length") (const_int 16))])
+
+(define_expand "atomic_fetch_nand<mode>"
+  [(match_operand:GPR 0 "register_operand")
+   (match_operand:GPR 1 "memory_operand")
+   (match_operand:GPR 2 "reg_or_0_operand")
+   (match_operand:SI  3 "const_int_operand")]
+  ""
+  {
+    /* ~(atom & mask) = (~mask) | (~atom), so we can hoist
+       (~mask) out of the ll/sc loop and use the orn instruction in the
+       ll/sc loop.  */
+    rtx inverted_mask = gen_reg_rtx (<MODE>mode);
+    emit_move_insn (inverted_mask,
+                   expand_simple_unop (<MODE>mode, NOT, operands[2],
+                                       NULL_RTX, false));
+
+    emit_insn (
+      gen_atomic_fetch_nand_mask_inverted<mode> (operands[0], operands[1],
+                                                inverted_mask));
+    DONE;
+  })
+
 (define_insn "atomic_exchange<mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
        (unspec_volatile:GPR
-- 
2.48.1

[PATCH 10/17] LoongArch: Implement atomic_fetch_nand

Reply via email to