Hello everyone,
This is an updated patch where I've made what was a predicate into a
pure C function based on some feedback outside the mailing list.
Ok for trunk?
Matthew
gcc/
2018-08-07 Matthew Malcomson <matthew.malcom...@arm.com>
* config/aarch64/aarch64-protos.h
(aarch64_armv8_4_offset_memory_operand): New declaration.
* config/aarch64/aarch64.c
(aarch64_armv8_4_offset_memory_operand): New.
* config/aarch64/aarch64.h (TARGET_ARMV8_4): Add feature macro.
* config/aarch64/atomics.md (atomic_store<mode>): Allow offset
and use stlur.
* config/aarch64/constraints.md (Ust): New constraint.
* config/aarch64/predicates.md.
(aarch64_sync_or_offset_memory_operand): New predicate.
gcc/testsuite/
2018-08-07 Matthew Malcomson <matthew.malcom...@arm.com>
* gcc.target/aarch64/atomic-store.c: New.
############### Attachment also inlined for ease of reply
###############
diff --git a/gcc/config/aarch64/aarch64-protos.h
b/gcc/config/aarch64/aarch64-protos.h
index
ef95fc829b83886e2ff00e4664e31af916e99b0c..8a1c1eac75ad486777804cec9c313c49e129cc4d
100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -393,6 +393,7 @@ void aarch64_split_add_offset (scalar_int_mode, rtx,
rtx, rtx, rtx, rtx);
bool aarch64_mov_operand_p (rtx, machine_mode);
rtx aarch64_reverse_mask (machine_mode, unsigned int);
bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64);
+bool aarch64_armv8_4_offset_memory_operand (rtx, machine_mode);
char *aarch64_output_sve_cnt_immediate (const char *, const char *, rtx);
char *aarch64_output_sve_addvl_addpl (rtx, rtx, rtx);
char *aarch64_output_sve_inc_dec_immediate (const char *, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index
c1218503bab19323eee1cca8b7e4bea8fbfcf573..328512e11f4230e24223bc51e55bdca8b31f6a20
100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -237,6 +237,9 @@ extern unsigned aarch64_architecture_version;
/* ARMv8.3-A features. */
#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3)
+/* ARMv8.4-A features. */
+#define TARGET_ARMV8_4 (AARCH64_ISA_V8_4)
+
/* Make sure this is always defined so we don't have to check for ifdefs
but rather use normal ifs. */
#ifndef TARGET_FIX_ERR_A53_835769_DEFAULT
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index
13b5448aca88555222481f0955237b6fdcbb38b9..607c4f8fc4786857db8f4c2848df18035ef42495
100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4501,6 +4501,38 @@ offset_12bit_unsigned_scaled_p (machine_mode
mode, poly_int64 offset)
&& IN_RANGE (multiple, 0, 4095));
}
+/* Return true if the rtx describes a memory operand consisting of a DImode
+ register offset with a 9 bit signed unscaled constant and we're
targeting
+ Armv8.4.
+ This function created to test for a case where the STLUR instruction
will be
+ used. */
+bool
+aarch64_armv8_4_offset_memory_operand (rtx op, machine_mode mode)
+{
+ if (!TARGET_ARMV8_4)
+ return false;
+
+ if (!MEM_P (op))
+ return false;
+ rtx mem_op = XEXP (op, 0);
+
+ if (GET_CODE (mem_op) != PLUS)
+ return false;
+ rtx plus_op0 = XEXP (mem_op, 0);
+ rtx plus_op1 = XEXP (mem_op, 1);
+
+ /* STLUR instruction requires DImode register. */
+ if (GET_MODE (plus_op0) != DImode
+ || !REG_P (plus_op0))
+ return false;
+
+ poly_int64 offset;
+ if (!poly_int_rtx_p (plus_op1, &offset))
+ return false;
+
+ return offset_9bit_signed_unscaled_p (mode, offset);
+}
+
/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
static sbitmap
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index
36c06756a1f94cadae097b3aad654fbeba1cf2f3..41b9845db00fccb3781d91cb3b95680b5c51eb11
100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -481,9 +481,9 @@
)
(define_insn "atomic_store<mode>"
- [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "=Q")
+ [(set (match_operand:ALLI 0 "aarch64_sync_or_offset_memory_operand"
"=Q,Ust")
(unspec_volatile:ALLI
- [(match_operand:ALLI 1 "general_operand" "rZ")
+ [(match_operand:ALLI 1 "general_operand" "rZ,rZ")
(match_operand:SI 2 "const_int_operand")] ;; model
UNSPECV_STL))]
""
@@ -491,8 +491,10 @@
enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
if (is_mm_relaxed (model) || is_mm_consume (model) ||
is_mm_acquire (model))
return "str<atomic_sfx>\t%<w>1, %0";
- else
+ else if (which_alternative == 0)
return "stlr<atomic_sfx>\t%<w>1, %0";
+ else
+ return "stlur<atomic_sfx>\t%<w>1, %0";
}
)
diff --git a/gcc/config/aarch64/constraints.md
b/gcc/config/aarch64/constraints.md
index
72cacdabdac52dcb40b480f7a5bfbf4997c742d8..40cc2f0143221aa2cd9ee0e5abb79640467ab03e
100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -218,6 +218,11 @@
(and (match_code "mem")
(match_test "REG_P (XEXP (op, 0))")))
+(define_special_memory_constraint "Ust"
+ "@internal
+ A memory address with 9bit unscaled offset for an Armv8.4-a instruction."
+ (match_test "aarch64_armv8_4_offset_memory_operand (op, mode)"))
+
(define_memory_constraint "Ump"
"@internal
A memory address suitable for a load/store pair operation."
diff --git a/gcc/config/aarch64/predicates.md
b/gcc/config/aarch64/predicates.md
index
d8f377b9603e76a29dd92f95e9905121eaf7b800..c36ca9f1c2dfdaec0f7bf0a363294001828a32d0
100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -359,6 +359,14 @@
(and (match_operand 0 "memory_operand")
(match_code "reg" "0")))
+;; True if the operand is memory reference valid for one of a stlr or stlur
+;; operation.
+(define_predicate "aarch64_sync_or_offset_memory_operand"
+ (ior (match_operand 0 "aarch64_sync_memory_operand")
+ (and (match_operand 0 "memory_operand")
+ (match_test "aarch64_armv8_4_offset_memory_operand (op, mode)"))))
+
+
;; Predicates for parallel expanders based on mode.
(define_special_predicate "vect_par_cnst_hi_half"
(match_code "parallel")
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-store.c
b/gcc/testsuite/gcc.target/aarch64/atomic-store.c
new file mode 100644
index
0000000000000000000000000000000000000000..7a9a9ff854a670977f307358e80ff60b8b329037
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-store.c
@@ -0,0 +1,74 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.4-a -O2" } */
+
+#include <stdlib.h>
+#include <stdatomic.h>
+#include <stdint.h>
+
+#define STORE_TESTS(size) \
+ void \
+ foo##size () \
+{ \
+ int##size##_t *atomic_vals = calloc (4, sizeof (int##size##_t)); \
+ atomic_store_explicit (atomic_vals, 2, memory_order_relaxed); \
+ atomic_store_explicit (atomic_vals, 2, memory_order_release); \
+ atomic_store_explicit ((atomic_vals + 1), 2, memory_order_release); \
+ atomic_store ((atomic_vals + 2), 2); \
+ atomic_store_explicit ((atomic_vals + 3), 2, memory_order_relaxed); \
+}
+
+STORE_TESTS (8);
+/* { dg-final { scan-assembler-times "strb\tw\[0-9\]+,
\\\[x\[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlrb\tw\[0-9\]+,
\\\[x\[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlurb\tw\[0-9\]+, \\\[x\[0-9\]+,
1\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlurb\tw\[0-9\]+, \\\[x\[0-9\]+,
2\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "strb\tw\[0-9\]+, \\\[x\[0-9\]+,
3\\\]" 1 } } */
+
+STORE_TESTS (16);
+/* { dg-final { scan-assembler-times "strh\tw\[0-9\]+,
\\\[x\[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlrh\tw\[0-9\]+,
\\\[x\[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlurh\tw\[0-9\]+, \\\[x\[0-9\]+,
2\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlurh\tw\[0-9\]+, \\\[x\[0-9\]+,
4\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "strh\tw\[0-9\]+, \\\[x\[0-9\]+,
6\\\]" 1 } } */
+
+STORE_TESTS (32);
+/* { dg-final { scan-assembler-times "str\tw\[0-9\]+,
\\\[x\[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlr\tw\[0-9\]+,
\\\[x\[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlur\tw\[0-9\]+, \\\[x\[0-9\]+,
4\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlur\tw\[0-9\]+, \\\[x\[0-9\]+,
8\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "str\tw\[0-9\]+, \\\[x\[0-9\]+,
12\\\]" 1 } } */
+
+STORE_TESTS (64);
+/* { dg-final { scan-assembler-times "str\tx\[0-9\]+,
\\\[x\[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlur\tx\[0-9\]+, \\\[x\[0-9\]+,
8\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlur\tx\[0-9\]+, \\\[x\[0-9\]+,
16\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "str\tx\[0-9\]+, \\\[x\[0-9\]+,
24\\\]" 1 } } */
+
+void
+foo_toolarge_offset ()
+{
+ int64_t *atomic_vals = calloc (4, sizeof (int64_t));
+ /* 9bit signed unscaled immediate =>
+ largest representable value +255.
+ smallest representable value -256. */
+ atomic_store_explicit (atomic_vals + 32, 2, memory_order_release);
+ atomic_store_explicit (atomic_vals - 33, 2, memory_order_release);
+}
+
+void
+foo_negative (int8_t *atomic_vals)
+{
+ atomic_store_explicit (atomic_vals - 2, 2, memory_order_release);
+}
+/* { dg-final { scan-assembler-times "stlurb\tw\[0-9\]+, \\\[x\[0-9\]+,
-2\\\]" 1 } } */
+
+#pragma GCC target ("arch=armv8.3-a")
+void
+foo_older_arch ()
+{
+ int64_t *atomic_vals = calloc (4, sizeof (int64_t));
+ atomic_store_explicit (atomic_vals + 2, 2, memory_order_release);
+}
+
+/* Three times, one for each of the three above functions. */
+/* { dg-final { scan-assembler-times "stlr\tx\[0-9\]+,
\\\[x\[0-9\]+\\\]" 4 } } */
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index ef95fc829b83886e2ff00e4664e31af916e99b0c..8a1c1eac75ad486777804cec9c313c49e129cc4d 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -393,6 +393,7 @@ void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx);
bool aarch64_mov_operand_p (rtx, machine_mode);
rtx aarch64_reverse_mask (machine_mode, unsigned int);
bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64);
+bool aarch64_armv8_4_offset_memory_operand (rtx, machine_mode);
char *aarch64_output_sve_cnt_immediate (const char *, const char *, rtx);
char *aarch64_output_sve_addvl_addpl (rtx, rtx, rtx);
char *aarch64_output_sve_inc_dec_immediate (const char *, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index c1218503bab19323eee1cca8b7e4bea8fbfcf573..328512e11f4230e24223bc51e55bdca8b31f6a20 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -237,6 +237,9 @@ extern unsigned aarch64_architecture_version;
/* ARMv8.3-A features. */
#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3)
+/* ARMv8.4-A features. */
+#define TARGET_ARMV8_4 (AARCH64_ISA_V8_4)
+
/* Make sure this is always defined so we don't have to check for ifdefs
but rather use normal ifs. */
#ifndef TARGET_FIX_ERR_A53_835769_DEFAULT
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 13b5448aca88555222481f0955237b6fdcbb38b9..607c4f8fc4786857db8f4c2848df18035ef42495 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4501,6 +4501,38 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
&& IN_RANGE (multiple, 0, 4095));
}
+/* Return true if the rtx describes a memory operand consisting of a DImode
+ register offset with a 9 bit signed unscaled constant and we're targeting
+ Armv8.4.
+ This function created to test for a case where the STLUR instruction will be
+ used. */
+bool
+aarch64_armv8_4_offset_memory_operand (rtx op, machine_mode mode)
+{
+ if (!TARGET_ARMV8_4)
+ return false;
+
+ if (!MEM_P (op))
+ return false;
+ rtx mem_op = XEXP (op, 0);
+
+ if (GET_CODE (mem_op) != PLUS)
+ return false;
+ rtx plus_op0 = XEXP (mem_op, 0);
+ rtx plus_op1 = XEXP (mem_op, 1);
+
+ /* STLUR instruction requires DImode register. */
+ if (GET_MODE (plus_op0) != DImode
+ || !REG_P (plus_op0))
+ return false;
+
+ poly_int64 offset;
+ if (!poly_int_rtx_p (plus_op1, &offset))
+ return false;
+
+ return offset_9bit_signed_unscaled_p (mode, offset);
+}
+
/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
static sbitmap
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index 36c06756a1f94cadae097b3aad654fbeba1cf2f3..41b9845db00fccb3781d91cb3b95680b5c51eb11 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -481,9 +481,9 @@
)
(define_insn "atomic_store<mode>"
- [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "=Q")
+ [(set (match_operand:ALLI 0 "aarch64_sync_or_offset_memory_operand" "=Q,Ust")
(unspec_volatile:ALLI
- [(match_operand:ALLI 1 "general_operand" "rZ")
+ [(match_operand:ALLI 1 "general_operand" "rZ,rZ")
(match_operand:SI 2 "const_int_operand")] ;; model
UNSPECV_STL))]
""
@@ -491,8 +491,10 @@
enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model))
return "str<atomic_sfx>\t%<w>1, %0";
- else
+ else if (which_alternative == 0)
return "stlr<atomic_sfx>\t%<w>1, %0";
+ else
+ return "stlur<atomic_sfx>\t%<w>1, %0";
}
)
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 72cacdabdac52dcb40b480f7a5bfbf4997c742d8..40cc2f0143221aa2cd9ee0e5abb79640467ab03e 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -218,6 +218,11 @@
(and (match_code "mem")
(match_test "REG_P (XEXP (op, 0))")))
+(define_special_memory_constraint "Ust"
+ "@internal
+ A memory address with 9bit unscaled offset for an Armv8.4-a instruction."
+ (match_test "aarch64_armv8_4_offset_memory_operand (op, mode)"))
+
(define_memory_constraint "Ump"
"@internal
A memory address suitable for a load/store pair operation."
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index d8f377b9603e76a29dd92f95e9905121eaf7b800..c36ca9f1c2dfdaec0f7bf0a363294001828a32d0 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -359,6 +359,14 @@
(and (match_operand 0 "memory_operand")
(match_code "reg" "0")))
+;; True if the operand is memory reference valid for one of a stlr or stlur
+;; operation.
+(define_predicate "aarch64_sync_or_offset_memory_operand"
+ (ior (match_operand 0 "aarch64_sync_memory_operand")
+ (and (match_operand 0 "memory_operand")
+ (match_test "aarch64_armv8_4_offset_memory_operand (op, mode)"))))
+
+
;; Predicates for parallel expanders based on mode.
(define_special_predicate "vect_par_cnst_hi_half"
(match_code "parallel")
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-store.c b/gcc/testsuite/gcc.target/aarch64/atomic-store.c
new file mode 100644
index 0000000000000000000000000000000000000000..7a9a9ff854a670977f307358e80ff60b8b329037
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-store.c
@@ -0,0 +1,74 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.4-a -O2" } */
+
+#include <stdlib.h>
+#include <stdatomic.h>
+#include <stdint.h>
+
+#define STORE_TESTS(size) \
+ void \
+ foo##size () \
+{ \
+ int##size##_t *atomic_vals = calloc (4, sizeof (int##size##_t)); \
+ atomic_store_explicit (atomic_vals, 2, memory_order_relaxed); \
+ atomic_store_explicit (atomic_vals, 2, memory_order_release); \
+ atomic_store_explicit ((atomic_vals + 1), 2, memory_order_release); \
+ atomic_store ((atomic_vals + 2), 2); \
+ atomic_store_explicit ((atomic_vals + 3), 2, memory_order_relaxed); \
+}
+
+STORE_TESTS (8);
+/* { dg-final { scan-assembler-times "strb\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlrb\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlurb\tw\[0-9\]+, \\\[x\[0-9\]+, 1\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlurb\tw\[0-9\]+, \\\[x\[0-9\]+, 2\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "strb\tw\[0-9\]+, \\\[x\[0-9\]+, 3\\\]" 1 } } */
+
+STORE_TESTS (16);
+/* { dg-final { scan-assembler-times "strh\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlrh\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlurh\tw\[0-9\]+, \\\[x\[0-9\]+, 2\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlurh\tw\[0-9\]+, \\\[x\[0-9\]+, 4\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "strh\tw\[0-9\]+, \\\[x\[0-9\]+, 6\\\]" 1 } } */
+
+STORE_TESTS (32);
+/* { dg-final { scan-assembler-times "str\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlur\tw\[0-9\]+, \\\[x\[0-9\]+, 4\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlur\tw\[0-9\]+, \\\[x\[0-9\]+, 8\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "str\tw\[0-9\]+, \\\[x\[0-9\]+, 12\\\]" 1 } } */
+
+STORE_TESTS (64);
+/* { dg-final { scan-assembler-times "str\tx\[0-9\]+, \\\[x\[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlur\tx\[0-9\]+, \\\[x\[0-9\]+, 8\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "stlur\tx\[0-9\]+, \\\[x\[0-9\]+, 16\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "str\tx\[0-9\]+, \\\[x\[0-9\]+, 24\\\]" 1 } } */
+
+void
+foo_toolarge_offset ()
+{
+ int64_t *atomic_vals = calloc (4, sizeof (int64_t));
+ /* 9bit signed unscaled immediate =>
+ largest representable value +255.
+ smallest representable value -256. */
+ atomic_store_explicit (atomic_vals + 32, 2, memory_order_release);
+ atomic_store_explicit (atomic_vals - 33, 2, memory_order_release);
+}
+
+void
+foo_negative (int8_t *atomic_vals)
+{
+ atomic_store_explicit (atomic_vals - 2, 2, memory_order_release);
+}
+/* { dg-final { scan-assembler-times "stlurb\tw\[0-9\]+, \\\[x\[0-9\]+, -2\\\]" 1 } } */
+
+#pragma GCC target ("arch=armv8.3-a")
+void
+foo_older_arch ()
+{
+ int64_t *atomic_vals = calloc (4, sizeof (int64_t));
+ atomic_store_explicit (atomic_vals + 2, 2, memory_order_release);
+}
+
+/* Three times, one for each of the three above functions. */
+/* { dg-final { scan-assembler-times "stlr\tx\[0-9\]+, \\\[x\[0-9\]+\\\]" 4 } } */