On Thu, 9 Jan 2020 at 16:53, Matthew Malcomson <matthew.malcom...@arm.com> wrote: > > We take no action to ensure the SVE vector size is large enough. It is > left to the user to check that before compiling this intrinsic or before > running such a program on a machine. > > The main difference between ld1ro and ld1rq is in the allowed offsets, > the implementation difference is that ld1ro is implemented using integer > modes since there are no pre-existing vector modes of the relevant size. > Adding new vector modes simply for this intrinsic seems to make the code > less tidy. > > Specifications can be found under the "Arm C Language Extensions for > Scalable Vector Extension" title at > https://developer.arm.com/architectures/system-architectures/software-standards/acle > > gcc/ChangeLog: > > 2020-01-09 Matthew Malcomson <matthew.malcom...@arm.com> > > * config/aarch64/aarch64-protos.h > (aarch64_sve_ld1ro_operand_p): New. > * config/aarch64/aarch64-sve-builtins-base.cc > (class load_replicate): New. > (class svld1ro_impl): New. > (class svld1rq_impl): Change to inherit from load_replicate. > (svld1ro): New sve intrinsic function base. > * config/aarch64/aarch64-sve-builtins-base.def (svld1ro): > New DEF_SVE_FUNCTION. > * config/aarch64/aarch64-sve-builtins-base.h > (svld1ro): New decl. > * config/aarch64/aarch64-sve-builtins.cc > (function_expander::add_mem_operand): Modify assert to allow > OImode. > * config/aarch64/aarch64-sve.md (@aarch64_sve_ld1ro<mode>): New > pattern. > * config/aarch64/aarch64.c > (aarch64_sve_ld1rq_operand_p): Implement in terms of ... > (aarch64_sve_ld1rq_ld1ro_operand_p): This. > (aarch64_sve_ld1ro_operand_p): New. > * config/aarch64/aarch64.md (UNSPEC_LD1RO): New unspec. > * config/aarch64/constraints.md (UOb,UOh,UOw,UOd): New. > * config/aarch64/predicates.md > (aarch64_sve_ld1ro_operand_{b,h,w,d}): New. > > gcc/testsuite/ChangeLog: > > 2020-01-09 Matthew Malcomson <matthew.malcom...@arm.com> > > * gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c: New test. > * gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c: New test. > * gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c: New test. > * gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c: New test. > * gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c: New test. > * gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c: New test. > * gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c: New test. > * gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c: New test. > * gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c: New test. > * gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c: New test. > * gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c: New test. > > > > ############### Attachment also inlined for ease of reply > ############### > > > diff --git a/gcc/config/aarch64/aarch64-protos.h > b/gcc/config/aarch64/aarch64-protos.h > index > c16b9362ea986ff221755bfc4d10bae674a67ed4..6d2162b93932e433677dae48e5c58975be2902d2 > 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -582,6 +582,7 @@ rtx aarch64_simd_gen_const_vector_dup (machine_mode, > HOST_WIDE_INT); > bool aarch64_simd_mem_operand_p (rtx); > bool aarch64_sve_ld1r_operand_p (rtx); > bool aarch64_sve_ld1rq_operand_p (rtx); > +bool aarch64_sve_ld1ro_operand_p (rtx, scalar_mode); > bool aarch64_sve_ldff1_operand_p (rtx); > bool aarch64_sve_ldnf1_operand_p (rtx); > bool aarch64_sve_ldr_operand_p (rtx); > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc > b/gcc/config/aarch64/aarch64-sve-builtins-base.cc > index > 38bd3adce1ebbde4c58531ffd26eedd4ae4938b0..e52a6012565fadd84cdd77a613f887e5ae53a576 > 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc > @@ -1139,7 +1139,7 @@ public: > } > }; > > -class svld1rq_impl : public function_base > +class load_replicate : public function_base > { > public: > unsigned int > @@ -1153,7 +1153,11 @@ public: > { > return fi.scalar_type (0); > } > +}; > > +class svld1rq_impl : public load_replicate > +{ > +public: > machine_mode > memory_vector_mode (const function_instance &fi) const OVERRIDE > { > @@ -1168,6 +1172,23 @@ public: > } > }; > > +class svld1ro_impl : public load_replicate > +{ > +public: > + machine_mode > + memory_vector_mode (const function_instance &fi) const OVERRIDE > + { > + return OImode; > + } > + > + rtx > + expand (function_expander &e) const OVERRIDE > + { > + insn_code icode = code_for_aarch64_sve_ld1ro (e.vector_mode (0)); > + return e.use_contiguous_load_insn (icode); > + } > +}; > + > /* Implements svld2, svld3 and svld4. */ > class svld234_impl : public full_width_access > { > @@ -2571,6 +2592,7 @@ FUNCTION (svlasta, svlast_impl, (UNSPEC_LASTA)) > FUNCTION (svlastb, svlast_impl, (UNSPEC_LASTB)) > FUNCTION (svld1, svld1_impl,) > FUNCTION (svld1_gather, svld1_gather_impl,) > +FUNCTION (svld1ro, svld1ro_impl,) > FUNCTION (svld1rq, svld1rq_impl,) > FUNCTION (svld1sb, svld1_extend_impl, (TYPE_SUFFIX_s8)) > FUNCTION (svld1sb_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s8)) > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def > b/gcc/config/aarch64/aarch64-sve-builtins-base.def > index > a678ee8f4eebad5a8be113968a08185f1fe848d7..d0a761720c435b934b8bca43a70ab9cccd2edc2e > 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.def > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def > @@ -316,3 +316,7 @@ DEF_SVE_FUNCTION (svzip1, binary_pred, all_pred, none) > DEF_SVE_FUNCTION (svzip2, binary, all_data, none) > DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none) > #undef REQUIRED_EXTENSIONS > + > +#define REQUIRED_EXTENSIONS AARCH64_FL_V8_6 | AARCH64_FL_F64MM > +DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit) > +#undef REQUIRED_EXTENSIONS > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.h > b/gcc/config/aarch64/aarch64-sve-builtins-base.h > index > 41ab12f4e35460603657e3b4f324545b5d10442f..d88aed8e30b97c6ad0a1699b971588e2aeaebcfa > 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.h > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.h > @@ -108,6 +108,7 @@ namespace aarch64_sve > extern const function_base *const svlastb; > extern const function_base *const svld1; > extern const function_base *const svld1_gather; > + extern const function_base *const svld1ro; > extern const function_base *const svld1rq; > extern const function_base *const svld1sb; > extern const function_base *const svld1sb_gather; > diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc > b/gcc/config/aarch64/aarch64-sve-builtins.cc > index > 88a8b791df713ddacaf3808588ce864416886c57..6089cf23281e54f5b0f32ff5b1858db47f705ee5 > 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins.cc > +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc > @@ -2525,7 +2525,11 @@ function_expander::add_integer_operand (HOST_WIDE_INT > x) > void > function_expander::add_mem_operand (machine_mode mode, rtx addr) > { > - gcc_assert (VECTOR_MODE_P (mode)); > + /* Exception for OImode for the ld1ro intrinsics. > + They act on 256 bit octaword data, and it's just easier to use a scalar > + mode to represent that than add a new vector mode solely for the purpose > + of this intrinsic. */ > + gcc_assert (VECTOR_MODE_P (mode) || mode == OImode); > rtx mem = gen_rtx_MEM (mode, memory_address (mode, addr)); > /* The memory is only guaranteed to be element-aligned. */ > set_mem_align (mem, GET_MODE_ALIGNMENT (GET_MODE_INNER (mode))); > diff --git a/gcc/config/aarch64/aarch64-sve.md > b/gcc/config/aarch64/aarch64-sve.md > index > 4427609b57907c47f6abd23a6137babd65586e3f..202503a4f336358f4ffbcb2f3c8f1210882852df > 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -2494,6 +2494,20 @@ > } > ) > > +(define_insn "@aarch64_sve_ld1ro<mode>" > + [(set (match_operand:SVE_FULL 0 "register_operand" "=w") > + (unspec:SVE_FULL > + [(match_operand:<VPRED> 2 "register_operand" "Upl") > + (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>" > + "UO<Vesize>")] > + UNSPEC_LD1RO))] > + "TARGET_SVE && TARGET_ARMV8_6" > + { > + operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0)); > + return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1"; > + } > +) > + > ;; ------------------------------------------------------------------------- > ;; ---- [INT,FP] Initialize from individual elements > ;; ------------------------------------------------------------------------- > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index > a3b18b381e1748f8fe5e522bdec4f7c850821fe8..f1b1a68fbc33449b6e6bd8cca1badfd41ac5a424 > 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -17073,18 +17073,20 @@ aarch64_sve_ld1r_operand_p (rtx op) > && offset_6bit_unsigned_scaled_p (mode, addr.const_offset)); > } > > -/* Return true if OP is a valid MEM operand for an SVE LD1RQ instruction. */ > +/* Return true if OP is a valid MEM operand for an SVE LD1R{Q,O} instruction > + where the size of the read data is specified by `mode` and the size of the > + vector elements are specified by `elem_mode`. */ > bool > -aarch64_sve_ld1rq_operand_p (rtx op) > +aarch64_sve_ld1rq_ld1ro_operand_p (rtx op, machine_mode mode, > + scalar_mode elem_mode) > { > struct aarch64_address_info addr; > - scalar_mode elem_mode = GET_MODE_INNER (GET_MODE (op)); > if (!MEM_P (op) > || !aarch64_classify_address (&addr, XEXP (op, 0), elem_mode, false)) > return false; > > if (addr.type == ADDRESS_REG_IMM) > - return offset_4bit_signed_scaled_p (TImode, addr.const_offset); > + return offset_4bit_signed_scaled_p (mode, addr.const_offset); > > if (addr.type == ADDRESS_REG_REG) > return (1U << addr.shift) == GET_MODE_SIZE (elem_mode); > @@ -17092,6 +17094,22 @@ aarch64_sve_ld1rq_operand_p (rtx op) > return false; > } > > +/* Return true if OP is a valid MEM operand for an SVE LD1RQ instruction. */ > +bool > +aarch64_sve_ld1rq_operand_p (rtx op) > +{ > + return aarch64_sve_ld1rq_ld1ro_operand_p (op, TImode, > + GET_MODE_INNER (GET_MODE (op))); > +} > + > +/* Return true if OP is a valid MEM operand for an SVE LD1RO instruction for > + accessing a vector where the element size is specified by `elem_mode`. */ > +bool > +aarch64_sve_ld1ro_operand_p (rtx op, scalar_mode elem_mode) > +{ > + return aarch64_sve_ld1rq_ld1ro_operand_p (op, OImode, elem_mode); > +} > + > /* Return true if OP is a valid MEM operand for an SVE LDFF1 instruction. */ > bool > aarch64_sve_ldff1_operand_p (rtx op) > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index > b11ead7ab23d78bb5b45662fecb73dd0f4e0fda0..db8aa40c8bfa7558a590e5fc25841cb8cab3c9fc > 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -273,6 +273,7 @@ > UNSPEC_GEN_TAG ; Generate a 4-bit MTE tag. > UNSPEC_GEN_TAG_RND ; Generate a random 4-bit MTE tag. > UNSPEC_TAG_SPACE ; Translate address to MTE tag address space. > + UNSPEC_LD1RO > ]) > > (define_c_enum "unspecv" [ > diff --git a/gcc/config/aarch64/constraints.md > b/gcc/config/aarch64/constraints.md > index > b9e5d13e851912c2d5b27a2d0dbc764bde3fa36f..a2d6b7d49e9cdf058463cb8acec8f583fd6f7eef > 100644 > --- a/gcc/config/aarch64/constraints.md > +++ b/gcc/config/aarch64/constraints.md > @@ -320,6 +320,31 @@ > (and (match_code "mem") > (match_test "aarch64_sve_ld1rq_operand_p (op)"))) > > +(define_memory_constraint "UOb" > + "@internal > + An address valid for SVE LD1ROH." > + (and (match_code "mem") > + (match_test "aarch64_sve_ld1ro_operand_p (op, QImode)"))) > + > +(define_memory_constraint "UOh" > + "@internal > + An address valid for SVE LD1ROH." > + (and (match_code "mem") > + (match_test "aarch64_sve_ld1ro_operand_p (op, HImode)"))) > + > + > +(define_memory_constraint "UOw" > + "@internal > + An address valid for SVE LD1ROW." > + (and (match_code "mem") > + (match_test "aarch64_sve_ld1ro_operand_p (op, SImode)"))) > + > +(define_memory_constraint "UOd" > + "@internal > + An address valid for SVE LD1ROD." > + (and (match_code "mem") > + (match_test "aarch64_sve_ld1ro_operand_p (op, DImode)"))) > + > (define_memory_constraint "Uty" > "@internal > An address valid for SVE LD1Rs." > diff --git a/gcc/config/aarch64/predicates.md > b/gcc/config/aarch64/predicates.md > index > da6779e790cb70203ea9920fadb5b926321d1576..55fa1b2a77017e7f0582b9d8999168cc2cbd5c99 > 100644 > --- a/gcc/config/aarch64/predicates.md > +++ b/gcc/config/aarch64/predicates.md > @@ -588,6 +588,22 @@ > (and (match_code "mem") > (match_test "aarch64_sve_ld1rq_operand_p (op)"))) > > +(define_predicate "aarch64_sve_ld1ro_operand_b" > + (and (match_code "mem") > + (match_test "aarch64_sve_ld1ro_operand_p (op, QImode)"))) > + > +(define_predicate "aarch64_sve_ld1ro_operand_h" > + (and (match_code "mem") > + (match_test "aarch64_sve_ld1ro_operand_p (op, HImode)"))) > + > +(define_predicate "aarch64_sve_ld1ro_operand_w" > + (and (match_code "mem") > + (match_test "aarch64_sve_ld1ro_operand_p (op, SImode)"))) > + > +(define_predicate "aarch64_sve_ld1ro_operand_d" > + (and (match_code "mem") > + (match_test "aarch64_sve_ld1ro_operand_p (op, DImode)"))) > +
> (define_predicate "aarch64_sve_ldff1_operand" > (and (match_code "mem") > (match_test "aarch64_sve_ldff1_operand_p (op)"))) Hi, > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c > b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..7badc75a43ab2009e9406afc04c980fc01834716 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c > @@ -0,0 +1,119 @@ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */ What is the binutils version requirement for this? Some validations using binutils-2.33.1 exhibit failures like: /xgcc -B/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-aarch64-none-linux-gnu/gcc3/gcc/ -fno-diagnostics-show-caret -fno-diagnostics-show-line-numbers -fdiagnostics-color=never -fdiagnostics-urls=never -std=c90 -O0 -g -DTEST_FULL -march=armv8.2-a+sve -fno-ipa-icf -march=armv8.6-a+sve+f64mm -c -o ld1ro_s16.o /gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c Assembler messages: Error: unknown architecture `armv8.6-a+sve+f64mm' Error: unrecognized option -march=armv8.6-a+sve+f64mm compiler exited with status 1 FAIL: gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c -std=c90 -O0 -g -DTEST_FULL 1 blank line(s) in output FAIL: gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c -std=c90 -O0 -g -DTEST_FULL (test for excess errors) Excess errors: Assembler messages: Error: unknown architecture `armv8.6-a+sve+f64mm' Error: unrecognized option -march=armv8.6-a+sve+f64mm while other configurations using 2.32 binutils seem to pass this test: /xgcc -B/home/tcwg-buildslave/workspace/tcwg-buildfarm__0/_build/builds/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/gcc.git~master_rev_3684bbb022cd75da55e1457673f269980aa12cdf-stage2/gcc/ /home/tcwg-buildslave/workspace/tcwg-buildfarm__0/snapshots/gcc.git~master_rev_3684bbb022cd75da55e1457673f269980aa12cdf/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c -fno-diagnostics-show-caret -fno-diagnostics-show-line-numbers -fdiagnostics-color=never -fdiagnostics-urls=never -std=c90 -O0 -g -DTEST_FULL -march=armv8.2-a+sve -fno-ipa-icf -march=armv8.6-a+sve+f64mm -S -o ld1ro_f16.s PASS: gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c -std=c90 -O0 -g -DTEST_FULL (test for excess errors) Ha... took me a while to realize that in the latter case we stop after generating the .s file and do not call the assembler... So... do we want/need additional consistency checks between gcc and gas versions? Thanks, Christophe > + > +#include "test_sve_acle.h" > + > +/* > +** ld1ro_f16_base: > +** ld1roh z0\.h, p0/z, \[x0\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f16_base, svfloat16_t, float16_t, > + z0 = svld1ro_f16 (p0, x0), > + z0 = svld1ro (p0, x0)) > + > +/* > +** ld1ro_f16_index: > +** ld1roh z0\.h, p0/z, \[x0, x1, lsl 1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f16_index, svfloat16_t, float16_t, > + z0 = svld1ro_f16 (p0, x0 + x1), > + z0 = svld1ro (p0, x0 + x1)) > + > +/* > +** ld1ro_f16_1: > +** add (x[0-9]+), x0, #?2 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f16_1, svfloat16_t, float16_t, > + z0 = svld1ro_f16 (p0, x0 + 1), > + z0 = svld1ro (p0, x0 + 1)) > + > +/* > +** ld1ro_f16_8: > +** add (x[0-9]+), x0, #?16 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f16_8, svfloat16_t, float16_t, > + z0 = svld1ro_f16 (p0, x0 + 8), > + z0 = svld1ro (p0, x0 + 8)) > + > +/* > +** ld1ro_f16_128: > +** add (x[0-9]+), x0, #?256 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f16_128, svfloat16_t, float16_t, > + z0 = svld1ro_f16 (p0, x0 + 128), > + z0 = svld1ro (p0, x0 + 128)) > + > +/* > +** ld1ro_f16_m1: > +** sub (x[0-9]+), x0, #?2 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f16_m1, svfloat16_t, float16_t, > + z0 = svld1ro_f16 (p0, x0 - 1), > + z0 = svld1ro (p0, x0 - 1)) > + > +/* > +** ld1ro_f16_m8: > +** sub (x[0-9]+), x0, #?16 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f16_m8, svfloat16_t, float16_t, > + z0 = svld1ro_f16 (p0, x0 - 8), > + z0 = svld1ro (p0, x0 - 8)) > + > +/* > +** ld1ro_f16_m144: > +** sub (x[0-9]+), x0, #?288 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f16_m144, svfloat16_t, float16_t, > + z0 = svld1ro_f16 (p0, x0 - 144), > + z0 = svld1ro (p0, x0 - 144)) > + > +/* > +** ld1ro_f16_16: > +** ld1roh z0\.h, p0/z, \[x0, #?32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f16_16, svfloat16_t, float16_t, > + z0 = svld1ro_f16 (p0, x0 + 16), > + z0 = svld1ro (p0, x0 + 16)) > + > +/* > +** ld1ro_f16_112: > +** ld1roh z0\.h, p0/z, \[x0, #?224\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f16_112, svfloat16_t, float16_t, > + z0 = svld1ro_f16 (p0, x0 + 112), > + z0 = svld1ro (p0, x0 + 112)) > + > +/* > +** ld1ro_f16_m16: > +** ld1roh z0\.h, p0/z, \[x0, #?-32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f16_m16, svfloat16_t, float16_t, > + z0 = svld1ro_f16 (p0, x0 - 16), > + z0 = svld1ro (p0, x0 - 16)) > + > +/* > +** ld1ro_f16_m128: > +** ld1roh z0\.h, p0/z, \[x0, #?-256\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f16_m128, svfloat16_t, float16_t, > + z0 = svld1ro_f16 (p0, x0 - 128), > + z0 = svld1ro (p0, x0 - 128)) > + > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c > b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..dd8a1c53cd0fb7b7acd0b92394f3977382ac26e0 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c > @@ -0,0 +1,119 @@ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */ > + > +#include "test_sve_acle.h" > + > +/* > +** ld1ro_f32_base: > +** ld1row z0\.s, p0/z, \[x0\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f32_base, svfloat32_t, float32_t, > + z0 = svld1ro_f32 (p0, x0), > + z0 = svld1ro (p0, x0)) > + > +/* > +** ld1ro_f32_index: > +** ld1row z0\.s, p0/z, \[x0, x1, lsl 2\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f32_index, svfloat32_t, float32_t, > + z0 = svld1ro_f32 (p0, x0 + x1), > + z0 = svld1ro (p0, x0 + x1)) > + > +/* > +** ld1ro_f32_1: > +** add (x[0-9]+), x0, #?4 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f32_1, svfloat32_t, float32_t, > + z0 = svld1ro_f32 (p0, x0 + 1), > + z0 = svld1ro (p0, x0 + 1)) > + > +/* > +** ld1ro_f32_4: > +** add (x[0-9]+), x0, #?16 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f32_4, svfloat32_t, float32_t, > + z0 = svld1ro_f32 (p0, x0 + 4), > + z0 = svld1ro (p0, x0 + 4)) > + > +/* > +** ld1ro_f32_64: > +** add (x[0-9]+), x0, #?256 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f32_64, svfloat32_t, float32_t, > + z0 = svld1ro_f32 (p0, x0 + 64), > + z0 = svld1ro (p0, x0 + 64)) > + > +/* > +** ld1ro_f32_m1: > +** sub (x[0-9]+), x0, #?4 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f32_m1, svfloat32_t, float32_t, > + z0 = svld1ro_f32 (p0, x0 - 1), > + z0 = svld1ro (p0, x0 - 1)) > + > +/* > +** ld1ro_f32_m4: > +** sub (x[0-9]+), x0, #?16 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f32_m4, svfloat32_t, float32_t, > + z0 = svld1ro_f32 (p0, x0 - 4), > + z0 = svld1ro (p0, x0 - 4)) > + > +/* > +** ld1ro_f32_m72: > +** sub (x[0-9]+), x0, #?288 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f32_m72, svfloat32_t, float32_t, > + z0 = svld1ro_f32 (p0, x0 - 72), > + z0 = svld1ro (p0, x0 - 72)) > + > +/* > +** ld1ro_f32_8: > +** ld1row z0\.s, p0/z, \[x0, #?32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f32_8, svfloat32_t, float32_t, > + z0 = svld1ro_f32 (p0, x0 + 8), > + z0 = svld1ro (p0, x0 + 8)) > + > +/* > +** ld1ro_f32_56: > +** ld1row z0\.s, p0/z, \[x0, #?224\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f32_56, svfloat32_t, float32_t, > + z0 = svld1ro_f32 (p0, x0 + 56), > + z0 = svld1ro (p0, x0 + 56)) > + > +/* > +** ld1ro_f32_m8: > +** ld1row z0\.s, p0/z, \[x0, #?-32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f32_m8, svfloat32_t, float32_t, > + z0 = svld1ro_f32 (p0, x0 - 8), > + z0 = svld1ro (p0, x0 - 8)) > + > +/* > +** ld1ro_f32_m64: > +** ld1row z0\.s, p0/z, \[x0, #?-256\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f32_m64, svfloat32_t, float32_t, > + z0 = svld1ro_f32 (p0, x0 - 64), > + z0 = svld1ro (p0, x0 - 64)) > + > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c > b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..30563698310f65060d34be4bef4c57a74ef9d734 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c > @@ -0,0 +1,119 @@ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */ > + > +#include "test_sve_acle.h" > + > +/* > +** ld1ro_f64_base: > +** ld1rod z0\.d, p0/z, \[x0\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f64_base, svfloat64_t, float64_t, > + z0 = svld1ro_f64 (p0, x0), > + z0 = svld1ro (p0, x0)) > + > +/* > +** ld1ro_f64_index: > +** ld1rod z0\.d, p0/z, \[x0, x1, lsl 3\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f64_index, svfloat64_t, float64_t, > + z0 = svld1ro_f64 (p0, x0 + x1), > + z0 = svld1ro (p0, x0 + x1)) > + > +/* > +** ld1ro_f64_1: > +** add (x[0-9]+), x0, #?8 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f64_1, svfloat64_t, float64_t, > + z0 = svld1ro_f64 (p0, x0 + 1), > + z0 = svld1ro (p0, x0 + 1)) > + > +/* > +** ld1ro_f64_2: > +** add (x[0-9]+), x0, #?16 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f64_2, svfloat64_t, float64_t, > + z0 = svld1ro_f64 (p0, x0 + 2), > + z0 = svld1ro (p0, x0 + 2)) > + > +/* > +** ld1ro_f64_32: > +** add (x[0-9]+), x0, #?256 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f64_32, svfloat64_t, float64_t, > + z0 = svld1ro_f64 (p0, x0 + 32), > + z0 = svld1ro (p0, x0 + 32)) > + > +/* > +** ld1ro_f64_m1: > +** sub (x[0-9]+), x0, #?8 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f64_m1, svfloat64_t, float64_t, > + z0 = svld1ro_f64 (p0, x0 - 1), > + z0 = svld1ro (p0, x0 - 1)) > + > +/* > +** ld1ro_f64_m2: > +** sub (x[0-9]+), x0, #?16 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f64_m2, svfloat64_t, float64_t, > + z0 = svld1ro_f64 (p0, x0 - 2), > + z0 = svld1ro (p0, x0 - 2)) > + > +/* > +** ld1ro_f64_m36: > +** sub (x[0-9]+), x0, #?288 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f64_m36, svfloat64_t, float64_t, > + z0 = svld1ro_f64 (p0, x0 - 36), > + z0 = svld1ro (p0, x0 - 36)) > + > +/* > +** ld1ro_f64_4: > +** ld1rod z0\.d, p0/z, \[x0, #?32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f64_4, svfloat64_t, float64_t, > + z0 = svld1ro_f64 (p0, x0 + 4), > + z0 = svld1ro (p0, x0 + 4)) > + > +/* > +** ld1ro_f64_28: > +** ld1rod z0\.d, p0/z, \[x0, #?224\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f64_28, svfloat64_t, float64_t, > + z0 = svld1ro_f64 (p0, x0 + 28), > + z0 = svld1ro (p0, x0 + 28)) > + > +/* > +** ld1ro_f64_m4: > +** ld1rod z0\.d, p0/z, \[x0, #?-32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f64_m4, svfloat64_t, float64_t, > + z0 = svld1ro_f64 (p0, x0 - 4), > + z0 = svld1ro (p0, x0 - 4)) > + > +/* > +** ld1ro_f64_m32: > +** ld1rod z0\.d, p0/z, \[x0, #?-256\] > +** ret > +*/ > +TEST_LOAD (ld1ro_f64_m32, svfloat64_t, float64_t, > + z0 = svld1ro_f64 (p0, x0 - 32), > + z0 = svld1ro (p0, x0 - 32)) > + > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c > b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..d4702fa6cc15e9f93751d8579cfecfd37759306e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c > @@ -0,0 +1,119 @@ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */ > + > +#include "test_sve_acle.h" > + > +/* > +** ld1ro_s16_base: > +** ld1roh z0\.h, p0/z, \[x0\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s16_base, svint16_t, int16_t, > + z0 = svld1ro_s16 (p0, x0), > + z0 = svld1ro (p0, x0)) > + > +/* > +** ld1ro_s16_index: > +** ld1roh z0\.h, p0/z, \[x0, x1, lsl 1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s16_index, svint16_t, int16_t, > + z0 = svld1ro_s16 (p0, x0 + x1), > + z0 = svld1ro (p0, x0 + x1)) > + > +/* > +** ld1ro_s16_1: > +** add (x[0-9]+), x0, #?2 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s16_1, svint16_t, int16_t, > + z0 = svld1ro_s16 (p0, x0 + 1), > + z0 = svld1ro (p0, x0 + 1)) > + > +/* > +** ld1ro_s16_8: > +** add (x[0-9]+), x0, #?16 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s16_8, svint16_t, int16_t, > + z0 = svld1ro_s16 (p0, x0 + 8), > + z0 = svld1ro (p0, x0 + 8)) > + > +/* > +** ld1ro_s16_128: > +** add (x[0-9]+), x0, #?256 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s16_128, svint16_t, int16_t, > + z0 = svld1ro_s16 (p0, x0 + 128), > + z0 = svld1ro (p0, x0 + 128)) > + > +/* > +** ld1ro_s16_m1: > +** sub (x[0-9]+), x0, #?2 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s16_m1, svint16_t, int16_t, > + z0 = svld1ro_s16 (p0, x0 - 1), > + z0 = svld1ro (p0, x0 - 1)) > + > +/* > +** ld1ro_s16_m8: > +** sub (x[0-9]+), x0, #?16 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s16_m8, svint16_t, int16_t, > + z0 = svld1ro_s16 (p0, x0 - 8), > + z0 = svld1ro (p0, x0 - 8)) > + > +/* > +** ld1ro_s16_m144: > +** sub (x[0-9]+), x0, #?288 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s16_m144, svint16_t, int16_t, > + z0 = svld1ro_s16 (p0, x0 - 144), > + z0 = svld1ro (p0, x0 - 144)) > + > +/* > +** ld1ro_s16_16: > +** ld1roh z0\.h, p0/z, \[x0, #?32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s16_16, svint16_t, int16_t, > + z0 = svld1ro_s16 (p0, x0 + 16), > + z0 = svld1ro (p0, x0 + 16)) > + > +/* > +** ld1ro_s16_112: > +** ld1roh z0\.h, p0/z, \[x0, #?224\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s16_112, svint16_t, int16_t, > + z0 = svld1ro_s16 (p0, x0 + 112), > + z0 = svld1ro (p0, x0 + 112)) > + > +/* > +** ld1ro_s16_m16: > +** ld1roh z0\.h, p0/z, \[x0, #?-32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s16_m16, svint16_t, int16_t, > + z0 = svld1ro_s16 (p0, x0 - 16), > + z0 = svld1ro (p0, x0 - 16)) > + > +/* > +** ld1ro_s16_m128: > +** ld1roh z0\.h, p0/z, \[x0, #?-256\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s16_m128, svint16_t, int16_t, > + z0 = svld1ro_s16 (p0, x0 - 128), > + z0 = svld1ro (p0, x0 - 128)) > + > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c > b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..4604b0b5fbfb716ae814bf88f7acfe8bf0eaa9f5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c > @@ -0,0 +1,119 @@ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */ > + > +#include "test_sve_acle.h" > + > +/* > +** ld1ro_s32_base: > +** ld1row z0\.s, p0/z, \[x0\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s32_base, svint32_t, int32_t, > + z0 = svld1ro_s32 (p0, x0), > + z0 = svld1ro (p0, x0)) > + > +/* > +** ld1ro_s32_index: > +** ld1row z0\.s, p0/z, \[x0, x1, lsl 2\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s32_index, svint32_t, int32_t, > + z0 = svld1ro_s32 (p0, x0 + x1), > + z0 = svld1ro (p0, x0 + x1)) > + > +/* > +** ld1ro_s32_1: > +** add (x[0-9]+), x0, #?4 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s32_1, svint32_t, int32_t, > + z0 = svld1ro_s32 (p0, x0 + 1), > + z0 = svld1ro (p0, x0 + 1)) > + > +/* > +** ld1ro_s32_4: > +** add (x[0-9]+), x0, #?16 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s32_4, svint32_t, int32_t, > + z0 = svld1ro_s32 (p0, x0 + 4), > + z0 = svld1ro (p0, x0 + 4)) > + > +/* > +** ld1ro_s32_64: > +** add (x[0-9]+), x0, #?256 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s32_64, svint32_t, int32_t, > + z0 = svld1ro_s32 (p0, x0 + 64), > + z0 = svld1ro (p0, x0 + 64)) > + > +/* > +** ld1ro_s32_m1: > +** sub (x[0-9]+), x0, #?4 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s32_m1, svint32_t, int32_t, > + z0 = svld1ro_s32 (p0, x0 - 1), > + z0 = svld1ro (p0, x0 - 1)) > + > +/* > +** ld1ro_s32_m4: > +** sub (x[0-9]+), x0, #?16 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s32_m4, svint32_t, int32_t, > + z0 = svld1ro_s32 (p0, x0 - 4), > + z0 = svld1ro (p0, x0 - 4)) > + > +/* > +** ld1ro_s32_m72: > +** sub (x[0-9]+), x0, #?288 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s32_m72, svint32_t, int32_t, > + z0 = svld1ro_s32 (p0, x0 - 72), > + z0 = svld1ro (p0, x0 - 72)) > + > +/* > +** ld1ro_s32_8: > +** ld1row z0\.s, p0/z, \[x0, #?32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s32_8, svint32_t, int32_t, > + z0 = svld1ro_s32 (p0, x0 + 8), > + z0 = svld1ro (p0, x0 + 8)) > + > +/* > +** ld1ro_s32_56: > +** ld1row z0\.s, p0/z, \[x0, #?224\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s32_56, svint32_t, int32_t, > + z0 = svld1ro_s32 (p0, x0 + 56), > + z0 = svld1ro (p0, x0 + 56)) > + > +/* > +** ld1ro_s32_m8: > +** ld1row z0\.s, p0/z, \[x0, #?-32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s32_m8, svint32_t, int32_t, > + z0 = svld1ro_s32 (p0, x0 - 8), > + z0 = svld1ro (p0, x0 - 8)) > + > +/* > +** ld1ro_s32_m64: > +** ld1row z0\.s, p0/z, \[x0, #?-256\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s32_m64, svint32_t, int32_t, > + z0 = svld1ro_s32 (p0, x0 - 64), > + z0 = svld1ro (p0, x0 - 64)) > + > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c > b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..dac98b293fb88f733ffcaec04a8b9861c3c502bf > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c > @@ -0,0 +1,119 @@ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */ > + > +#include "test_sve_acle.h" > + > +/* > +** ld1ro_s64_base: > +** ld1rod z0\.d, p0/z, \[x0\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s64_base, svint64_t, int64_t, > + z0 = svld1ro_s64 (p0, x0), > + z0 = svld1ro (p0, x0)) > + > +/* > +** ld1ro_s64_index: > +** ld1rod z0\.d, p0/z, \[x0, x1, lsl 3\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s64_index, svint64_t, int64_t, > + z0 = svld1ro_s64 (p0, x0 + x1), > + z0 = svld1ro (p0, x0 + x1)) > + > +/* > +** ld1ro_s64_1: > +** add (x[0-9]+), x0, #?8 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s64_1, svint64_t, int64_t, > + z0 = svld1ro_s64 (p0, x0 + 1), > + z0 = svld1ro (p0, x0 + 1)) > + > +/* > +** ld1ro_s64_2: > +** add (x[0-9]+), x0, #?16 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s64_2, svint64_t, int64_t, > + z0 = svld1ro_s64 (p0, x0 + 2), > + z0 = svld1ro (p0, x0 + 2)) > + > +/* > +** ld1ro_s64_32: > +** add (x[0-9]+), x0, #?256 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s64_32, svint64_t, int64_t, > + z0 = svld1ro_s64 (p0, x0 + 32), > + z0 = svld1ro (p0, x0 + 32)) > + > +/* > +** ld1ro_s64_m1: > +** sub (x[0-9]+), x0, #?8 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s64_m1, svint64_t, int64_t, > + z0 = svld1ro_s64 (p0, x0 - 1), > + z0 = svld1ro (p0, x0 - 1)) > + > +/* > +** ld1ro_s64_m2: > +** sub (x[0-9]+), x0, #?16 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s64_m2, svint64_t, int64_t, > + z0 = svld1ro_s64 (p0, x0 - 2), > + z0 = svld1ro (p0, x0 - 2)) > + > +/* > +** ld1ro_s64_m36: > +** sub (x[0-9]+), x0, #?288 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s64_m36, svint64_t, int64_t, > + z0 = svld1ro_s64 (p0, x0 - 36), > + z0 = svld1ro (p0, x0 - 36)) > + > +/* > +** ld1ro_s64_4: > +** ld1rod z0\.d, p0/z, \[x0, #?32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s64_4, svint64_t, int64_t, > + z0 = svld1ro_s64 (p0, x0 + 4), > + z0 = svld1ro (p0, x0 + 4)) > + > +/* > +** ld1ro_s64_28: > +** ld1rod z0\.d, p0/z, \[x0, #?224\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s64_28, svint64_t, int64_t, > + z0 = svld1ro_s64 (p0, x0 + 28), > + z0 = svld1ro (p0, x0 + 28)) > + > +/* > +** ld1ro_s64_m4: > +** ld1rod z0\.d, p0/z, \[x0, #?-32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s64_m4, svint64_t, int64_t, > + z0 = svld1ro_s64 (p0, x0 - 4), > + z0 = svld1ro (p0, x0 - 4)) > + > +/* > +** ld1ro_s64_m32: > +** ld1rod z0\.d, p0/z, \[x0, #?-256\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s64_m32, svint64_t, int64_t, > + z0 = svld1ro_s64 (p0, x0 - 32), > + z0 = svld1ro (p0, x0 - 32)) > + > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c > b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..17df5dbb0d7302d9d735b13fb97111a657efbbfc > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c > @@ -0,0 +1,119 @@ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */ > + > +#include "test_sve_acle.h" > + > +/* > +** ld1ro_s8_base: > +** ld1rob z0\.b, p0/z, \[x0\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s8_base, svint8_t, int8_t, > + z0 = svld1ro_s8 (p0, x0), > + z0 = svld1ro (p0, x0)) > + > +/* > +** ld1ro_s8_index: > +** ld1rob z0\.b, p0/z, \[x0, x1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s8_index, svint8_t, int8_t, > + z0 = svld1ro_s8 (p0, x0 + x1), > + z0 = svld1ro (p0, x0 + x1)) > + > +/* > +** ld1ro_s8_1: > +** add (x[0-9]+), x0, #?1 > +** ld1rob z0\.b, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s8_1, svint8_t, int8_t, > + z0 = svld1ro_s8 (p0, x0 + 1), > + z0 = svld1ro (p0, x0 + 1)) > + > +/* > +** ld1ro_s8_16: > +** add (x[0-9]+), x0, #?16 > +** ld1rob z0\.b, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s8_16, svint8_t, int8_t, > + z0 = svld1ro_s8 (p0, x0 + 16), > + z0 = svld1ro (p0, x0 + 16)) > + > +/* > +** ld1ro_s8_256: > +** add (x[0-9]+), x0, #?256 > +** ld1rob z0\.b, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s8_256, svint8_t, int8_t, > + z0 = svld1ro_s8 (p0, x0 + 256), > + z0 = svld1ro (p0, x0 + 256)) > + > +/* > +** ld1ro_s8_m1: > +** sub (x[0-9]+), x0, #?1 > +** ld1rob z0\.b, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s8_m1, svint8_t, int8_t, > + z0 = svld1ro_s8 (p0, x0 - 1), > + z0 = svld1ro (p0, x0 - 1)) > + > +/* > +** ld1ro_s8_m16: > +** sub (x[0-9]+), x0, #?16 > +** ld1rob z0\.b, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s8_m16, svint8_t, int8_t, > + z0 = svld1ro_s8 (p0, x0 - 16), > + z0 = svld1ro (p0, x0 - 16)) > + > +/* > +** ld1ro_s8_m288: > +** sub (x[0-9]+), x0, #?288 > +** ld1rob z0\.b, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s8_m288, svint8_t, int8_t, > + z0 = svld1ro_s8 (p0, x0 - 288), > + z0 = svld1ro (p0, x0 - 288)) > + > +/* > +** ld1ro_s8_32: > +** ld1rob z0\.b, p0/z, \[x0, #?32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s8_32, svint8_t, int8_t, > + z0 = svld1ro_s8 (p0, x0 + 32), > + z0 = svld1ro (p0, x0 + 32)) > + > +/* > +** ld1ro_s8_224: > +** ld1rob z0\.b, p0/z, \[x0, #?224\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s8_224, svint8_t, int8_t, > + z0 = svld1ro_s8 (p0, x0 + 224), > + z0 = svld1ro (p0, x0 + 224)) > + > +/* > +** ld1ro_s8_m32: > +** ld1rob z0\.b, p0/z, \[x0, #?-32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s8_m32, svint8_t, int8_t, > + z0 = svld1ro_s8 (p0, x0 - 32), > + z0 = svld1ro (p0, x0 - 32)) > + > +/* > +** ld1ro_s8_m256: > +** ld1rob z0\.b, p0/z, \[x0, #?-256\] > +** ret > +*/ > +TEST_LOAD (ld1ro_s8_m256, svint8_t, int8_t, > + z0 = svld1ro_s8 (p0, x0 - 256), > + z0 = svld1ro (p0, x0 - 256)) > + > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c > b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..611e9166b0ff6d98ab3d05799072484a623ae3a0 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c > @@ -0,0 +1,119 @@ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */ > + > +#include "test_sve_acle.h" > + > +/* > +** ld1ro_u16_base: > +** ld1roh z0\.h, p0/z, \[x0\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u16_base, svuint16_t, uint16_t, > + z0 = svld1ro_u16 (p0, x0), > + z0 = svld1ro (p0, x0)) > + > +/* > +** ld1ro_u16_index: > +** ld1roh z0\.h, p0/z, \[x0, x1, lsl 1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u16_index, svuint16_t, uint16_t, > + z0 = svld1ro_u16 (p0, x0 + x1), > + z0 = svld1ro (p0, x0 + x1)) > + > +/* > +** ld1ro_u16_1: > +** add (x[0-9]+), x0, #?2 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u16_1, svuint16_t, uint16_t, > + z0 = svld1ro_u16 (p0, x0 + 1), > + z0 = svld1ro (p0, x0 + 1)) > + > +/* > +** ld1ro_u16_8: > +** add (x[0-9]+), x0, #?16 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u16_8, svuint16_t, uint16_t, > + z0 = svld1ro_u16 (p0, x0 + 8), > + z0 = svld1ro (p0, x0 + 8)) > + > +/* > +** ld1ro_u16_128: > +** add (x[0-9]+), x0, #?256 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u16_128, svuint16_t, uint16_t, > + z0 = svld1ro_u16 (p0, x0 + 128), > + z0 = svld1ro (p0, x0 + 128)) > + > +/* > +** ld1ro_u16_m1: > +** sub (x[0-9]+), x0, #?2 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u16_m1, svuint16_t, uint16_t, > + z0 = svld1ro_u16 (p0, x0 - 1), > + z0 = svld1ro (p0, x0 - 1)) > + > +/* > +** ld1ro_u16_m8: > +** sub (x[0-9]+), x0, #?16 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u16_m8, svuint16_t, uint16_t, > + z0 = svld1ro_u16 (p0, x0 - 8), > + z0 = svld1ro (p0, x0 - 8)) > + > +/* > +** ld1ro_u16_m144: > +** sub (x[0-9]+), x0, #?288 > +** ld1roh z0\.h, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u16_m144, svuint16_t, uint16_t, > + z0 = svld1ro_u16 (p0, x0 - 144), > + z0 = svld1ro (p0, x0 - 144)) > + > +/* > +** ld1ro_u16_16: > +** ld1roh z0\.h, p0/z, \[x0, #?32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u16_16, svuint16_t, uint16_t, > + z0 = svld1ro_u16 (p0, x0 + 16), > + z0 = svld1ro (p0, x0 + 16)) > + > +/* > +** ld1ro_u16_112: > +** ld1roh z0\.h, p0/z, \[x0, #?224\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u16_112, svuint16_t, uint16_t, > + z0 = svld1ro_u16 (p0, x0 + 112), > + z0 = svld1ro (p0, x0 + 112)) > + > +/* > +** ld1ro_u16_m16: > +** ld1roh z0\.h, p0/z, \[x0, #?-32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u16_m16, svuint16_t, uint16_t, > + z0 = svld1ro_u16 (p0, x0 - 16), > + z0 = svld1ro (p0, x0 - 16)) > + > +/* > +** ld1ro_u16_m128: > +** ld1roh z0\.h, p0/z, \[x0, #?-256\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u16_m128, svuint16_t, uint16_t, > + z0 = svld1ro_u16 (p0, x0 - 128), > + z0 = svld1ro (p0, x0 - 128)) > + > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c > b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..7cb5bb93aa008272f9d765be567f084da0adc9a6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c > @@ -0,0 +1,119 @@ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */ > + > +#include "test_sve_acle.h" > + > +/* > +** ld1ro_u32_base: > +** ld1row z0\.s, p0/z, \[x0\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u32_base, svuint32_t, uint32_t, > + z0 = svld1ro_u32 (p0, x0), > + z0 = svld1ro (p0, x0)) > + > +/* > +** ld1ro_u32_index: > +** ld1row z0\.s, p0/z, \[x0, x1, lsl 2\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u32_index, svuint32_t, uint32_t, > + z0 = svld1ro_u32 (p0, x0 + x1), > + z0 = svld1ro (p0, x0 + x1)) > + > +/* > +** ld1ro_u32_1: > +** add (x[0-9]+), x0, #?4 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u32_1, svuint32_t, uint32_t, > + z0 = svld1ro_u32 (p0, x0 + 1), > + z0 = svld1ro (p0, x0 + 1)) > + > +/* > +** ld1ro_u32_4: > +** add (x[0-9]+), x0, #?16 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u32_4, svuint32_t, uint32_t, > + z0 = svld1ro_u32 (p0, x0 + 4), > + z0 = svld1ro (p0, x0 + 4)) > + > +/* > +** ld1ro_u32_64: > +** add (x[0-9]+), x0, #?256 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u32_64, svuint32_t, uint32_t, > + z0 = svld1ro_u32 (p0, x0 + 64), > + z0 = svld1ro (p0, x0 + 64)) > + > +/* > +** ld1ro_u32_m1: > +** sub (x[0-9]+), x0, #?4 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u32_m1, svuint32_t, uint32_t, > + z0 = svld1ro_u32 (p0, x0 - 1), > + z0 = svld1ro (p0, x0 - 1)) > + > +/* > +** ld1ro_u32_m4: > +** sub (x[0-9]+), x0, #?16 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u32_m4, svuint32_t, uint32_t, > + z0 = svld1ro_u32 (p0, x0 - 4), > + z0 = svld1ro (p0, x0 - 4)) > + > +/* > +** ld1ro_u32_m72: > +** sub (x[0-9]+), x0, #?288 > +** ld1row z0\.s, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u32_m72, svuint32_t, uint32_t, > + z0 = svld1ro_u32 (p0, x0 - 72), > + z0 = svld1ro (p0, x0 - 72)) > + > +/* > +** ld1ro_u32_8: > +** ld1row z0\.s, p0/z, \[x0, #?32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u32_8, svuint32_t, uint32_t, > + z0 = svld1ro_u32 (p0, x0 + 8), > + z0 = svld1ro (p0, x0 + 8)) > + > +/* > +** ld1ro_u32_56: > +** ld1row z0\.s, p0/z, \[x0, #?224\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u32_56, svuint32_t, uint32_t, > + z0 = svld1ro_u32 (p0, x0 + 56), > + z0 = svld1ro (p0, x0 + 56)) > + > +/* > +** ld1ro_u32_m8: > +** ld1row z0\.s, p0/z, \[x0, #?-32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u32_m8, svuint32_t, uint32_t, > + z0 = svld1ro_u32 (p0, x0 - 8), > + z0 = svld1ro (p0, x0 - 8)) > + > +/* > +** ld1ro_u32_m64: > +** ld1row z0\.s, p0/z, \[x0, #?-256\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u32_m64, svuint32_t, uint32_t, > + z0 = svld1ro_u32 (p0, x0 - 64), > + z0 = svld1ro (p0, x0 - 64)) > + > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c > b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..2194d52d5e3592e22d311be27573e254f16f2897 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c > @@ -0,0 +1,119 @@ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */ > + > +#include "test_sve_acle.h" > + > +/* > +** ld1ro_u64_base: > +** ld1rod z0\.d, p0/z, \[x0\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u64_base, svuint64_t, uint64_t, > + z0 = svld1ro_u64 (p0, x0), > + z0 = svld1ro (p0, x0)) > + > +/* > +** ld1ro_u64_index: > +** ld1rod z0\.d, p0/z, \[x0, x1, lsl 3\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u64_index, svuint64_t, uint64_t, > + z0 = svld1ro_u64 (p0, x0 + x1), > + z0 = svld1ro (p0, x0 + x1)) > + > +/* > +** ld1ro_u64_1: > +** add (x[0-9]+), x0, #?8 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u64_1, svuint64_t, uint64_t, > + z0 = svld1ro_u64 (p0, x0 + 1), > + z0 = svld1ro (p0, x0 + 1)) > + > +/* > +** ld1ro_u64_2: > +** add (x[0-9]+), x0, #?16 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u64_2, svuint64_t, uint64_t, > + z0 = svld1ro_u64 (p0, x0 + 2), > + z0 = svld1ro (p0, x0 + 2)) > + > +/* > +** ld1ro_u64_32: > +** add (x[0-9]+), x0, #?256 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u64_32, svuint64_t, uint64_t, > + z0 = svld1ro_u64 (p0, x0 + 32), > + z0 = svld1ro (p0, x0 + 32)) > + > +/* > +** ld1ro_u64_m1: > +** sub (x[0-9]+), x0, #?8 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u64_m1, svuint64_t, uint64_t, > + z0 = svld1ro_u64 (p0, x0 - 1), > + z0 = svld1ro (p0, x0 - 1)) > + > +/* > +** ld1ro_u64_m2: > +** sub (x[0-9]+), x0, #?16 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u64_m2, svuint64_t, uint64_t, > + z0 = svld1ro_u64 (p0, x0 - 2), > + z0 = svld1ro (p0, x0 - 2)) > + > +/* > +** ld1ro_u64_m36: > +** sub (x[0-9]+), x0, #?288 > +** ld1rod z0\.d, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u64_m36, svuint64_t, uint64_t, > + z0 = svld1ro_u64 (p0, x0 - 36), > + z0 = svld1ro (p0, x0 - 36)) > + > +/* > +** ld1ro_u64_4: > +** ld1rod z0\.d, p0/z, \[x0, #?32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u64_4, svuint64_t, uint64_t, > + z0 = svld1ro_u64 (p0, x0 + 4), > + z0 = svld1ro (p0, x0 + 4)) > + > +/* > +** ld1ro_u64_28: > +** ld1rod z0\.d, p0/z, \[x0, #?224\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u64_28, svuint64_t, uint64_t, > + z0 = svld1ro_u64 (p0, x0 + 28), > + z0 = svld1ro (p0, x0 + 28)) > + > +/* > +** ld1ro_u64_m4: > +** ld1rod z0\.d, p0/z, \[x0, #?-32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u64_m4, svuint64_t, uint64_t, > + z0 = svld1ro_u64 (p0, x0 - 4), > + z0 = svld1ro (p0, x0 - 4)) > + > +/* > +** ld1ro_u64_m32: > +** ld1rod z0\.d, p0/z, \[x0, #?-256\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u64_m32, svuint64_t, uint64_t, > + z0 = svld1ro_u64 (p0, x0 - 32), > + z0 = svld1ro (p0, x0 - 32)) > + > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c > b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..b98c0c7444f6c50d6c4e185a431a49040a267154 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c > @@ -0,0 +1,119 @@ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > +/* { dg-additional-options "-march=armv8.6-a+sve+f64mm" } */ > + > +#include "test_sve_acle.h" > + > +/* > +** ld1ro_u8_base: > +** ld1rob z0\.b, p0/z, \[x0\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u8_base, svuint8_t, uint8_t, > + z0 = svld1ro_u8 (p0, x0), > + z0 = svld1ro (p0, x0)) > + > +/* > +** ld1ro_u8_index: > +** ld1rob z0\.b, p0/z, \[x0, x1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u8_index, svuint8_t, uint8_t, > + z0 = svld1ro_u8 (p0, x0 + x1), > + z0 = svld1ro (p0, x0 + x1)) > + > +/* > +** ld1ro_u8_1: > +** add (x[0-9]+), x0, #?1 > +** ld1rob z0\.b, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u8_1, svuint8_t, uint8_t, > + z0 = svld1ro_u8 (p0, x0 + 1), > + z0 = svld1ro (p0, x0 + 1)) > + > +/* > +** ld1ro_u8_16: > +** add (x[0-9]+), x0, #?16 > +** ld1rob z0\.b, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u8_16, svuint8_t, uint8_t, > + z0 = svld1ro_u8 (p0, x0 + 16), > + z0 = svld1ro (p0, x0 + 16)) > + > +/* > +** ld1ro_u8_256: > +** add (x[0-9]+), x0, #?256 > +** ld1rob z0\.b, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u8_256, svuint8_t, uint8_t, > + z0 = svld1ro_u8 (p0, x0 + 256), > + z0 = svld1ro (p0, x0 + 256)) > + > +/* > +** ld1ro_u8_m1: > +** sub (x[0-9]+), x0, #?1 > +** ld1rob z0\.b, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u8_m1, svuint8_t, uint8_t, > + z0 = svld1ro_u8 (p0, x0 - 1), > + z0 = svld1ro (p0, x0 - 1)) > + > +/* > +** ld1ro_u8_m16: > +** sub (x[0-9]+), x0, #?16 > +** ld1rob z0\.b, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u8_m16, svuint8_t, uint8_t, > + z0 = svld1ro_u8 (p0, x0 - 16), > + z0 = svld1ro (p0, x0 - 16)) > + > +/* > +** ld1ro_u8_m288: > +** sub (x[0-9]+), x0, #?288 > +** ld1rob z0\.b, p0/z, \[\1\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u8_m288, svuint8_t, uint8_t, > + z0 = svld1ro_u8 (p0, x0 - 288), > + z0 = svld1ro (p0, x0 - 288)) > + > +/* > +** ld1ro_u8_32: > +** ld1rob z0\.b, p0/z, \[x0, #?32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u8_32, svuint8_t, uint8_t, > + z0 = svld1ro_u8 (p0, x0 + 32), > + z0 = svld1ro (p0, x0 + 32)) > + > +/* > +** ld1ro_u8_224: > +** ld1rob z0\.b, p0/z, \[x0, #?224\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u8_224, svuint8_t, uint8_t, > + z0 = svld1ro_u8 (p0, x0 + 224), > + z0 = svld1ro (p0, x0 + 224)) > + > +/* > +** ld1ro_u8_m32: > +** ld1rob z0\.b, p0/z, \[x0, #?-32\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u8_m32, svuint8_t, uint8_t, > + z0 = svld1ro_u8 (p0, x0 - 32), > + z0 = svld1ro (p0, x0 - 32)) > + > +/* > +** ld1ro_u8_m256: > +** ld1rob z0\.b, p0/z, \[x0, #?-256\] > +** ret > +*/ > +TEST_LOAD (ld1ro_u8_m256, svuint8_t, uint8_t, > + z0 = svld1ro_u8 (p0, x0 - 256), > + z0 = svld1ro (p0, x0 - 256)) > + >