nickdesaulniers created this revision. nickdesaulniers added a reviewer: DavidSpickett. Herald added a subscriber: kristof.beyls. Herald added a project: All. nickdesaulniers requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits.
The generated arm_neon.h header isn't -Wdeclaration-after-statement compliant when targeting -mbig-endian. Update the generator to declare the return value, if any, first before any other arguments that might need to be "reversed" from little endian to big. Link: https://github.com/ClangBuiltLinux/linux/issues/1603 Fixes: https://github.com/llvm/llvm-project/issues/54062 Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D122189 Files: clang/test/CodeGen/aarch64-neon-ldst-one.c clang/test/CodeGen/arm_neon_intrinsics.c clang/test/Sema/arm-neon-decl-after-stmt.c clang/utils/TableGen/NeonEmitter.cpp
Index: clang/utils/TableGen/NeonEmitter.cpp =================================================================== --- clang/utils/TableGen/NeonEmitter.cpp +++ clang/utils/TableGen/NeonEmitter.cpp @@ -502,6 +502,7 @@ void emitBody(StringRef CallPrefix); void emitShadowedArgs(); void emitArgumentReversal(); + void emitReturnVarDecl(); void emitReturnReversal(); void emitReverseVariable(Variable &Dest, Variable &Src); void emitNewLine(); @@ -1228,6 +1229,15 @@ } } +void Intrinsic::emitReturnVarDecl() { + assert(RetVar.getType() == Types[0]); + // Create a return variable, if we're not void. + if (!RetVar.getType().isVoid()) { + OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; + emitNewLine(); + } +} + void Intrinsic::emitReturnReversal() { if (isBigEndianSafe()) return; @@ -1353,13 +1363,6 @@ void Intrinsic::emitBody(StringRef CallPrefix) { std::vector<std::string> Lines; - assert(RetVar.getType() == Types[0]); - // Create a return variable, if we're not void. - if (!RetVar.getType().isVoid()) { - OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; - emitNewLine(); - } - if (!Body || Body->getValues().empty()) { // Nothing specific to output - must output a builtin. emitBodyAsBuiltinCall(); @@ -1849,6 +1852,7 @@ OS << " __attribute__((unavailable));"; } else { emitOpeningBrace(); + emitReturnVarDecl(); emitShadowedArgs(); if (ReverseArguments) emitArgumentReversal(); @@ -1867,6 +1871,7 @@ CurrentRecord = R; initVariables(); + emitReturnVarDecl(); emitBody(""); OS.str(""); Index: clang/test/Sema/arm-neon-decl-after-stmt.c =================================================================== --- /dev/null +++ clang/test/Sema/arm-neon-decl-after-stmt.c @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -triple armebv7-linux-gnueabihf -target-feature +neon \ +// RUN: -Wdeclaration-after-statement -fsyntax-only -verify %s +// REQUIRES: aarch64-registered-target || arm-registered-target +// https://github.com/llvm/llvm-project/issues/54062 +#include <arm_neon.h> + +uint8x16_t a; + +uint8x16_t x(void) { + return vshrq_n_u8(a, 8); +} +// expected-no-diagnostics Index: clang/test/CodeGen/arm_neon_intrinsics.c =================================================================== --- clang/test/CodeGen/arm_neon_intrinsics.c +++ clang/test/CodeGen/arm_neon_intrinsics.c @@ -12145,8 +12145,8 @@ // CHECK-LABEL: @test_vld2q_lane_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT16X8X2_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X8X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT16X8X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X8X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT16X8X2_T]], %struct.uint16x8x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 16 @@ -12177,8 +12177,8 @@ // CHECK-LABEL: @test_vld2q_lane_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT32X4X2_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X4X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT32X4X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X4X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT32X4X2_T]], %struct.uint32x4x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 16 @@ -12209,8 +12209,8 @@ // CHECK-LABEL: @test_vld2q_lane_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT16X8X2_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X8X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT16X8X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X8X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT16X8X2_T]], %struct.int16x8x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 16 @@ -12241,8 +12241,8 @@ // CHECK-LABEL: @test_vld2q_lane_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT32X4X2_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X4X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT32X4X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X4X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT32X4X2_T]], %struct.int32x4x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 16 @@ -12273,8 +12273,8 @@ // CHECK-LABEL: @test_vld2q_lane_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X2_T]], %struct.float16x8x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 16 @@ -12305,8 +12305,8 @@ // CHECK-LABEL: @test_vld2q_lane_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT32X4X2_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X4X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT32X4X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X4X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT32X4X2_T]], %struct.float32x4x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 16 @@ -12337,8 +12337,8 @@ // CHECK-LABEL: @test_vld2q_lane_p16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY16X8X2_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X8X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY16X8X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X8X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY16X8X2_T]], %struct.poly16x8x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 16 @@ -12369,8 +12369,8 @@ // CHECK-LABEL: @test_vld2_lane_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X2_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* // CHECK-NEXT: store [2 x i64] [[B_COERCE:%.*]], [2 x i64]* [[TMP0]], align 8 @@ -12396,8 +12396,8 @@ // CHECK-LABEL: @test_vld2_lane_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT16X4X2_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X4X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT16X4X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X4X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT16X4X2_T]], %struct.uint16x4x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* // CHECK-NEXT: store [2 x i64] [[B_COERCE:%.*]], [2 x i64]* [[TMP0]], align 8 @@ -12428,8 +12428,8 @@ // CHECK-LABEL: @test_vld2_lane_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT32X2X2_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X2X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT32X2X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X2X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT32X2X2_T]], %struct.uint32x2x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]* // CHECK-NEXT: store [2 x i64] [[B_COERCE:%.*]], [2 x i64]* [[TMP0]], align 8 @@ -12460,8 +12460,8 @@ // CHECK-LABEL: @test_vld2_lane_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X2_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* // CHECK-NEXT: store [2 x i64] [[B_COERCE:%.*]], [2 x i64]* [[TMP0]], align 8 @@ -12487,8 +12487,8 @@ // CHECK-LABEL: @test_vld2_lane_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT16X4X2_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X4X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT16X4X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X4X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT16X4X2_T]], %struct.int16x4x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* // CHECK-NEXT: store [2 x i64] [[B_COERCE:%.*]], [2 x i64]* [[TMP0]], align 8 @@ -12519,8 +12519,8 @@ // CHECK-LABEL: @test_vld2_lane_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT32X2X2_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X2X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT32X2X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X2X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT32X2X2_T]], %struct.int32x2x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]* // CHECK-NEXT: store [2 x i64] [[B_COERCE:%.*]], [2 x i64]* [[TMP0]], align 8 @@ -12551,8 +12551,8 @@ // CHECK-LABEL: @test_vld2_lane_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X2_T]], %struct.float16x4x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]* // CHECK-NEXT: store [2 x i64] [[B_COERCE:%.*]], [2 x i64]* [[TMP0]], align 8 @@ -12583,8 +12583,8 @@ // CHECK-LABEL: @test_vld2_lane_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT32X2X2_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X2X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT32X2X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X2X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT32X2X2_T]], %struct.float32x2x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]* // CHECK-NEXT: store [2 x i64] [[B_COERCE:%.*]], [2 x i64]* [[TMP0]], align 8 @@ -12615,8 +12615,8 @@ // CHECK-LABEL: @test_vld2_lane_p8( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X2_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* // CHECK-NEXT: store [2 x i64] [[B_COERCE:%.*]], [2 x i64]* [[TMP0]], align 8 @@ -12642,8 +12642,8 @@ // CHECK-LABEL: @test_vld2_lane_p16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY16X4X2_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X4X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY16X4X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X4X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY16X4X2_T]], %struct.poly16x4x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* // CHECK-NEXT: store [2 x i64] [[B_COERCE:%.*]], [2 x i64]* [[TMP0]], align 8 @@ -12976,8 +12976,8 @@ // CHECK-LABEL: @test_vld3q_lane_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT16X8X3_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X8X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT16X8X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X8X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT16X8X3_T]], %struct.uint16x8x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* // CHECK-NEXT: store [6 x i64] [[B_COERCE:%.*]], [6 x i64]* [[TMP0]], align 16 @@ -13013,8 +13013,8 @@ // CHECK-LABEL: @test_vld3q_lane_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT32X4X3_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X4X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT32X4X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X4X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT32X4X3_T]], %struct.uint32x4x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* // CHECK-NEXT: store [6 x i64] [[B_COERCE:%.*]], [6 x i64]* [[TMP0]], align 16 @@ -13050,8 +13050,8 @@ // CHECK-LABEL: @test_vld3q_lane_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT16X8X3_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X8X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT16X8X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X8X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT16X8X3_T]], %struct.int16x8x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* // CHECK-NEXT: store [6 x i64] [[B_COERCE:%.*]], [6 x i64]* [[TMP0]], align 16 @@ -13087,8 +13087,8 @@ // CHECK-LABEL: @test_vld3q_lane_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT32X4X3_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X4X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT32X4X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X4X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT32X4X3_T]], %struct.int32x4x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* // CHECK-NEXT: store [6 x i64] [[B_COERCE:%.*]], [6 x i64]* [[TMP0]], align 16 @@ -13124,8 +13124,8 @@ // CHECK-LABEL: @test_vld3q_lane_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT16X8X3_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X8X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT16X8X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X8X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X3_T]], %struct.float16x8x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]* // CHECK-NEXT: store [6 x i64] [[B_COERCE:%.*]], [6 x i64]* [[TMP0]], align 16 @@ -13161,8 +13161,8 @@ // CHECK-LABEL: @test_vld3q_lane_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT32X4X3_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X4X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT32X4X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X4X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT32X4X3_T]], %struct.float32x4x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]* // CHECK-NEXT: store [6 x i64] [[B_COERCE:%.*]], [6 x i64]* [[TMP0]], align 16 @@ -13198,8 +13198,8 @@ // CHECK-LABEL: @test_vld3q_lane_p16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY16X8X3_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X8X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY16X8X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X8X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY16X8X3_T]], %struct.poly16x8x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* // CHECK-NEXT: store [6 x i64] [[B_COERCE:%.*]], [6 x i64]* [[TMP0]], align 16 @@ -13235,8 +13235,8 @@ // CHECK-LABEL: @test_vld3_lane_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X3_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* // CHECK-NEXT: store [3 x i64] [[B_COERCE:%.*]], [3 x i64]* [[TMP0]], align 8 @@ -13265,8 +13265,8 @@ // CHECK-LABEL: @test_vld3_lane_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT16X4X3_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X4X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT16X4X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X4X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT16X4X3_T]], %struct.uint16x4x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* // CHECK-NEXT: store [3 x i64] [[B_COERCE:%.*]], [3 x i64]* [[TMP0]], align 8 @@ -13302,8 +13302,8 @@ // CHECK-LABEL: @test_vld3_lane_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT32X2X3_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X2X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT32X2X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X2X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT32X2X3_T]], %struct.uint32x2x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]* // CHECK-NEXT: store [3 x i64] [[B_COERCE:%.*]], [3 x i64]* [[TMP0]], align 8 @@ -13339,8 +13339,8 @@ // CHECK-LABEL: @test_vld3_lane_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X3_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* // CHECK-NEXT: store [3 x i64] [[B_COERCE:%.*]], [3 x i64]* [[TMP0]], align 8 @@ -13369,8 +13369,8 @@ // CHECK-LABEL: @test_vld3_lane_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT16X4X3_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X4X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT16X4X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X4X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT16X4X3_T]], %struct.int16x4x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* // CHECK-NEXT: store [3 x i64] [[B_COERCE:%.*]], [3 x i64]* [[TMP0]], align 8 @@ -13406,8 +13406,8 @@ // CHECK-LABEL: @test_vld3_lane_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT32X2X3_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X2X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT32X2X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X2X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT32X2X3_T]], %struct.int32x2x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]* // CHECK-NEXT: store [3 x i64] [[B_COERCE:%.*]], [3 x i64]* [[TMP0]], align 8 @@ -13443,8 +13443,8 @@ // CHECK-LABEL: @test_vld3_lane_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT16X4X3_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X4X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT16X4X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X4X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X3_T]], %struct.float16x4x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]* // CHECK-NEXT: store [3 x i64] [[B_COERCE:%.*]], [3 x i64]* [[TMP0]], align 8 @@ -13480,8 +13480,8 @@ // CHECK-LABEL: @test_vld3_lane_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT32X2X3_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X2X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT32X2X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X2X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT32X2X3_T]], %struct.float32x2x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]* // CHECK-NEXT: store [3 x i64] [[B_COERCE:%.*]], [3 x i64]* [[TMP0]], align 8 @@ -13517,8 +13517,8 @@ // CHECK-LABEL: @test_vld3_lane_p8( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X3_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* // CHECK-NEXT: store [3 x i64] [[B_COERCE:%.*]], [3 x i64]* [[TMP0]], align 8 @@ -13547,8 +13547,8 @@ // CHECK-LABEL: @test_vld3_lane_p16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY16X4X3_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X4X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY16X4X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X4X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY16X4X3_T]], %struct.poly16x4x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* // CHECK-NEXT: store [3 x i64] [[B_COERCE:%.*]], [3 x i64]* [[TMP0]], align 8 @@ -13886,8 +13886,8 @@ // CHECK-LABEL: @test_vld4q_lane_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT16X8X4_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X8X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT16X8X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X8X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT16X8X4_T]], %struct.uint16x8x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* // CHECK-NEXT: store [8 x i64] [[B_COERCE:%.*]], [8 x i64]* [[TMP0]], align 16 @@ -13928,8 +13928,8 @@ // CHECK-LABEL: @test_vld4q_lane_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT32X4X4_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X4X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT32X4X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X4X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT32X4X4_T]], %struct.uint32x4x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* // CHECK-NEXT: store [8 x i64] [[B_COERCE:%.*]], [8 x i64]* [[TMP0]], align 16 @@ -13970,8 +13970,8 @@ // CHECK-LABEL: @test_vld4q_lane_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT16X8X4_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X8X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT16X8X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X8X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT16X8X4_T]], %struct.int16x8x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* // CHECK-NEXT: store [8 x i64] [[B_COERCE:%.*]], [8 x i64]* [[TMP0]], align 16 @@ -14012,8 +14012,8 @@ // CHECK-LABEL: @test_vld4q_lane_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT32X4X4_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X4X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT32X4X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X4X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT32X4X4_T]], %struct.int32x4x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* // CHECK-NEXT: store [8 x i64] [[B_COERCE:%.*]], [8 x i64]* [[TMP0]], align 16 @@ -14054,8 +14054,8 @@ // CHECK-LABEL: @test_vld4q_lane_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT16X8X4_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X8X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT16X8X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X8X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X4_T]], %struct.float16x8x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]* // CHECK-NEXT: store [8 x i64] [[B_COERCE:%.*]], [8 x i64]* [[TMP0]], align 16 @@ -14096,8 +14096,8 @@ // CHECK-LABEL: @test_vld4q_lane_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT32X4X4_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X4X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT32X4X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X4X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT32X4X4_T]], %struct.float32x4x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]* // CHECK-NEXT: store [8 x i64] [[B_COERCE:%.*]], [8 x i64]* [[TMP0]], align 16 @@ -14138,8 +14138,8 @@ // CHECK-LABEL: @test_vld4q_lane_p16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY16X8X4_T:%.*]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X8X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY16X8X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X8X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY16X8X4_T]], %struct.poly16x8x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* // CHECK-NEXT: store [8 x i64] [[B_COERCE:%.*]], [8 x i64]* [[TMP0]], align 16 @@ -14180,8 +14180,8 @@ // CHECK-LABEL: @test_vld4_lane_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X4_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 8 @@ -14213,8 +14213,8 @@ // CHECK-LABEL: @test_vld4_lane_u16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT16X4X4_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X4X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT16X4X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X4X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT16X4X4_T]], %struct.uint16x4x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 8 @@ -14255,8 +14255,8 @@ // CHECK-LABEL: @test_vld4_lane_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT32X2X4_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X2X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT32X2X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X2X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT32X2X4_T]], %struct.uint32x2x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 8 @@ -14297,8 +14297,8 @@ // CHECK-LABEL: @test_vld4_lane_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X4_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 8 @@ -14330,8 +14330,8 @@ // CHECK-LABEL: @test_vld4_lane_s16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT16X4X4_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X4X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT16X4X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X4X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT16X4X4_T]], %struct.int16x4x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 8 @@ -14372,8 +14372,8 @@ // CHECK-LABEL: @test_vld4_lane_s32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT32X2X4_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X2X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT32X2X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X2X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT32X2X4_T]], %struct.int32x2x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 8 @@ -14414,8 +14414,8 @@ // CHECK-LABEL: @test_vld4_lane_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT16X4X4_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X4X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT16X4X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X4X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X4_T]], %struct.float16x4x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 8 @@ -14456,8 +14456,8 @@ // CHECK-LABEL: @test_vld4_lane_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT32X2X4_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X2X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT32X2X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X2X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT32X2X4_T]], %struct.float32x2x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 8 @@ -14498,8 +14498,8 @@ // CHECK-LABEL: @test_vld4_lane_p8( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X4_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 8 @@ -14531,8 +14531,8 @@ // CHECK-LABEL: @test_vld4_lane_p16( // CHECK-NEXT: entry: // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY16X4X4_T:%.*]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X4X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY16X4X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X4X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY16X4X4_T]], %struct.poly16x4x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* // CHECK-NEXT: store [4 x i64] [[B_COERCE:%.*]], [4 x i64]* [[TMP0]], align 8 Index: clang/test/CodeGen/aarch64-neon-ldst-one.c =================================================================== --- clang/test/CodeGen/aarch64-neon-ldst-one.c +++ clang/test/CodeGen/aarch64-neon-ldst-one.c @@ -1097,8 +1097,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16 // CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[SRC]], i32 0, i32 0 // CHECK-NEXT: store [2 x <16 x i8>] [[SRC_COERCE:%.*]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* @@ -1128,8 +1128,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16 // CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[SRC]], i32 0, i32 0 // CHECK-NEXT: store [2 x <16 x i8>] [[SRC_COERCE:%.*]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* @@ -1159,8 +1159,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16 // CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[SRC]], i32 0, i32 0 // CHECK-NEXT: store [2 x <16 x i8>] [[SRC_COERCE:%.*]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* @@ -1190,8 +1190,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16 // CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[SRC]], i32 0, i32 0 // CHECK-NEXT: store [3 x <16 x i8>] [[SRC_COERCE:%.*]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* @@ -1224,8 +1224,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16 // CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[SRC]], i32 0, i32 0 // CHECK-NEXT: store [3 x <16 x i8>] [[SRC_COERCE:%.*]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* @@ -1258,8 +1258,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT16X8X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT16X8X2_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X8X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT16X8X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X8X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT16X8X2_T]], %struct.uint16x8x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <8 x i16>] [[B_COERCE:%.*]], [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* @@ -1294,8 +1294,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT32X4X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT32X4X2_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X4X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT32X4X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X4X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT32X4X2_T]], %struct.uint32x4x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <4 x i32>] [[B_COERCE:%.*]], [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* @@ -1330,8 +1330,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT64X2X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT64X2X2_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT64X2X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT64X2X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT64X2X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT64X2X2_T]], %struct.uint64x2x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <2 x i64>] [[B_COERCE:%.*]], [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8* @@ -1366,8 +1366,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT16X8X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT16X8X2_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X8X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT16X8X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X8X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT16X8X2_T]], %struct.int16x8x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <8 x i16>] [[B_COERCE:%.*]], [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* @@ -1402,8 +1402,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT32X4X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT32X4X2_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X4X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT32X4X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X4X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT32X4X2_T]], %struct.int32x4x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <4 x i32>] [[B_COERCE:%.*]], [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* @@ -1438,8 +1438,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT64X2X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT64X2X2_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT64X2X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT64X2X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT64X2X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT64X2X2_T]], %struct.int64x2x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <2 x i64>] [[B_COERCE:%.*]], [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8* @@ -1474,8 +1474,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X2_T]], %struct.float16x8x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <8 x half>] [[B_COERCE:%.*]], [2 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* @@ -1510,8 +1510,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT32X4X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT32X4X2_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X4X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT32X4X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X4X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT32X4X2_T]], %struct.float32x4x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <4 x float>] [[B_COERCE:%.*]], [2 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* @@ -1546,8 +1546,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT64X2X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT64X2X2_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT64X2X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT64X2X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT64X2X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT64X2X2_T]], %struct.float64x2x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <2 x double>] [[B_COERCE:%.*]], [2 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8* @@ -1582,8 +1582,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY16X8X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY16X8X2_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X8X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY16X8X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X8X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY16X8X2_T]], %struct.poly16x8x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <8 x i16>] [[B_COERCE:%.*]], [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* @@ -1618,8 +1618,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY64X2X2_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY64X2X2_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY64X2X2_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY64X2X2_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY64X2X2_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY64X2X2_T]], %struct.poly64x2x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <2 x i64>] [[B_COERCE:%.*]], [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8* @@ -1654,8 +1654,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT8X8X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE:%.*]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* @@ -1685,8 +1685,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT16X4X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT16X4X2_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X4X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT16X4X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X4X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT16X4X2_T]], %struct.uint16x4x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <4 x i16>] [[B_COERCE:%.*]], [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* @@ -1721,8 +1721,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT32X2X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT32X2X2_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X2X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT32X2X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X2X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT32X2X2_T]], %struct.uint32x2x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <2 x i32>] [[B_COERCE:%.*]], [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* @@ -1757,8 +1757,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT64X1X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT64X1X2_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT64X1X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT64X1X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT64X1X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT64X1X2_T]], %struct.uint64x1x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <1 x i64>] [[B_COERCE:%.*]], [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* @@ -1793,8 +1793,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT8X8X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE:%.*]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* @@ -1824,8 +1824,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT16X4X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT16X4X2_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X4X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT16X4X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X4X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT16X4X2_T]], %struct.int16x4x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <4 x i16>] [[B_COERCE:%.*]], [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* @@ -1860,8 +1860,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT32X2X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT32X2X2_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X2X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT32X2X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X2X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT32X2X2_T]], %struct.int32x2x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <2 x i32>] [[B_COERCE:%.*]], [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* @@ -1896,8 +1896,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT64X1X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT64X1X2_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT64X1X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT64X1X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT64X1X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT64X1X2_T]], %struct.int64x1x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <1 x i64>] [[B_COERCE:%.*]], [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* @@ -1932,8 +1932,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X2_T]], %struct.float16x4x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <4 x half>] [[B_COERCE:%.*]], [2 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* @@ -1968,8 +1968,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT32X2X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT32X2X2_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X2X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT32X2X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X2X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT32X2X2_T]], %struct.float32x2x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <2 x float>] [[B_COERCE:%.*]], [2 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* @@ -2004,8 +2004,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT64X1X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT64X1X2_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT64X1X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT64X1X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT64X1X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT64X1X2_T]], %struct.float64x1x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <1 x double>] [[B_COERCE:%.*]], [2 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8* @@ -2040,8 +2040,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY8X8X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE:%.*]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* @@ -2071,8 +2071,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY16X4X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY16X4X2_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X4X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY16X4X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X4X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY16X4X2_T]], %struct.poly16x4x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <4 x i16>] [[B_COERCE:%.*]], [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* @@ -2107,8 +2107,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY64X1X2_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY64X1X2_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY64X1X2_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY64X1X2_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY64X1X2_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY64X1X2_T]], %struct.poly64x1x2_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [2 x <1 x i64>] [[B_COERCE:%.*]], [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8* @@ -2143,8 +2143,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT16X8X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT16X8X3_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X8X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT16X8X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X8X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT16X8X3_T]], %struct.uint16x8x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <8 x i16>] [[B_COERCE:%.*]], [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* @@ -2184,8 +2184,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT32X4X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT32X4X3_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X4X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT32X4X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X4X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT32X4X3_T]], %struct.uint32x4x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <4 x i32>] [[B_COERCE:%.*]], [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* @@ -2225,8 +2225,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT64X2X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT64X2X3_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT64X2X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT64X2X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT64X2X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT64X2X3_T]], %struct.uint64x2x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <2 x i64>] [[B_COERCE:%.*]], [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8* @@ -2266,8 +2266,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT16X8X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT16X8X3_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X8X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT16X8X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X8X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT16X8X3_T]], %struct.int16x8x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <8 x i16>] [[B_COERCE:%.*]], [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* @@ -2307,8 +2307,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT32X4X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT32X4X3_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X4X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT32X4X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X4X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT32X4X3_T]], %struct.int32x4x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <4 x i32>] [[B_COERCE:%.*]], [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* @@ -2348,8 +2348,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT64X2X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT64X2X3_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT64X2X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT64X2X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT64X2X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT64X2X3_T]], %struct.int64x2x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <2 x i64>] [[B_COERCE:%.*]], [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8* @@ -2389,8 +2389,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT16X8X3_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X8X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT16X8X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X8X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X3_T]], %struct.float16x8x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <8 x half>] [[B_COERCE:%.*]], [3 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* @@ -2430,8 +2430,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT32X4X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT32X4X3_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X4X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT32X4X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X4X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT32X4X3_T]], %struct.float32x4x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <4 x float>] [[B_COERCE:%.*]], [3 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* @@ -2471,8 +2471,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT64X2X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT64X2X3_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT64X2X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT64X2X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT64X2X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT64X2X3_T]], %struct.float64x2x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <2 x double>] [[B_COERCE:%.*]], [3 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8* @@ -2512,8 +2512,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE:%.*]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* @@ -2546,8 +2546,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY16X8X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY16X8X3_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X8X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY16X8X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X8X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY16X8X3_T]], %struct.poly16x8x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <8 x i16>] [[B_COERCE:%.*]], [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* @@ -2587,8 +2587,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY64X2X3_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY64X2X3_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY64X2X3_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY64X2X3_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY64X2X3_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY64X2X3_T]], %struct.poly64x2x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <2 x i64>] [[B_COERCE:%.*]], [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8* @@ -2628,8 +2628,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT8X8X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE:%.*]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* @@ -2662,8 +2662,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT16X4X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT16X4X3_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X4X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT16X4X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X4X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT16X4X3_T]], %struct.uint16x4x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <4 x i16>] [[B_COERCE:%.*]], [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* @@ -2703,8 +2703,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT32X2X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT32X2X3_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X2X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT32X2X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X2X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT32X2X3_T]], %struct.uint32x2x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <2 x i32>] [[B_COERCE:%.*]], [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* @@ -2744,8 +2744,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT64X1X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT64X1X3_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT64X1X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT64X1X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT64X1X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT64X1X3_T]], %struct.uint64x1x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <1 x i64>] [[B_COERCE:%.*]], [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* @@ -2785,8 +2785,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT8X8X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE:%.*]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* @@ -2819,8 +2819,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT16X4X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT16X4X3_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X4X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT16X4X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X4X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT16X4X3_T]], %struct.int16x4x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <4 x i16>] [[B_COERCE:%.*]], [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* @@ -2860,8 +2860,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT32X2X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT32X2X3_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X2X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT32X2X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X2X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT32X2X3_T]], %struct.int32x2x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <2 x i32>] [[B_COERCE:%.*]], [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* @@ -2901,8 +2901,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT64X1X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT64X1X3_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT64X1X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT64X1X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT64X1X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT64X1X3_T]], %struct.int64x1x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <1 x i64>] [[B_COERCE:%.*]], [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* @@ -2942,8 +2942,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT16X4X3_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X4X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT16X4X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X4X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X3_T]], %struct.float16x4x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <4 x half>] [[B_COERCE:%.*]], [3 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* @@ -2983,8 +2983,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT32X2X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT32X2X3_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X2X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT32X2X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X2X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT32X2X3_T]], %struct.float32x2x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <2 x float>] [[B_COERCE:%.*]], [3 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* @@ -3024,8 +3024,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT64X1X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT64X1X3_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT64X1X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT64X1X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT64X1X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT64X1X3_T]], %struct.float64x1x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <1 x double>] [[B_COERCE:%.*]], [3 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8* @@ -3065,8 +3065,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY8X8X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE:%.*]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* @@ -3099,8 +3099,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY16X4X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY16X4X3_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X4X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY16X4X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X4X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY16X4X3_T]], %struct.poly16x4x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <4 x i16>] [[B_COERCE:%.*]], [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* @@ -3140,8 +3140,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY64X1X3_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY64X1X3_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY64X1X3_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY64X1X3_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY64X1X3_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY64X1X3_T]], %struct.poly64x1x3_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [3 x <1 x i64>] [[B_COERCE:%.*]], [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8* @@ -3181,8 +3181,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE:%.*]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* @@ -3218,8 +3218,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT16X8X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT16X8X4_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X8X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT16X8X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X8X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT16X8X4_T]], %struct.uint16x8x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <8 x i16>] [[B_COERCE:%.*]], [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* @@ -3264,8 +3264,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT32X4X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT32X4X4_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X4X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT32X4X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X4X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT32X4X4_T]], %struct.uint32x4x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <4 x i32>] [[B_COERCE:%.*]], [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* @@ -3310,8 +3310,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT64X2X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT64X2X4_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT64X2X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT64X2X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT64X2X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT64X2X4_T]], %struct.uint64x2x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <2 x i64>] [[B_COERCE:%.*]], [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8* @@ -3356,8 +3356,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE:%.*]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* @@ -3393,8 +3393,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT16X8X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT16X8X4_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X8X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT16X8X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X8X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT16X8X4_T]], %struct.int16x8x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <8 x i16>] [[B_COERCE:%.*]], [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* @@ -3439,8 +3439,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT32X4X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT32X4X4_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X4X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT32X4X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X4X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT32X4X4_T]], %struct.int32x4x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <4 x i32>] [[B_COERCE:%.*]], [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* @@ -3485,8 +3485,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT64X2X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT64X2X4_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT64X2X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT64X2X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT64X2X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT64X2X4_T]], %struct.int64x2x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <2 x i64>] [[B_COERCE:%.*]], [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8* @@ -3531,8 +3531,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT16X8X4_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X8X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT16X8X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X8X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X4_T]], %struct.float16x8x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <8 x half>] [[B_COERCE:%.*]], [4 x <8 x half>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* @@ -3577,8 +3577,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT32X4X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT32X4X4_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X4X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT32X4X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X4X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT32X4X4_T]], %struct.float32x4x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <4 x float>] [[B_COERCE:%.*]], [4 x <4 x float>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* @@ -3623,8 +3623,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT64X2X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT64X2X4_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT64X2X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT64X2X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT64X2X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT64X2X4_T]], %struct.float64x2x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <2 x double>] [[B_COERCE:%.*]], [4 x <2 x double>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8* @@ -3669,8 +3669,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE:%.*]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* @@ -3706,8 +3706,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY16X8X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY16X8X4_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X8X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY16X8X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X8X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY16X8X4_T]], %struct.poly16x8x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <8 x i16>] [[B_COERCE:%.*]], [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* @@ -3752,8 +3752,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY64X2X4_T:%.*]], align 16 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY64X2X4_T]], align 16 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY64X2X4_T]], align 16 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY64X2X4_T]], align 16 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY64X2X4_T]], align 16 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY64X2X4_T]], %struct.poly64x2x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <2 x i64>] [[B_COERCE:%.*]], [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8* @@ -3798,8 +3798,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT8X8X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE:%.*]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* @@ -3835,8 +3835,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT16X4X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT16X4X4_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X4X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT16X4X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT16X4X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT16X4X4_T]], %struct.uint16x4x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <4 x i16>] [[B_COERCE:%.*]], [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* @@ -3881,8 +3881,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT32X2X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT32X2X4_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X2X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT32X2X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT32X2X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT32X2X4_T]], %struct.uint32x2x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <2 x i32>] [[B_COERCE:%.*]], [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* @@ -3927,8 +3927,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_UINT64X1X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT64X1X4_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT64X1X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_UINT64X1X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_UINT64X1X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT64X1X4_T]], %struct.uint64x1x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <1 x i64>] [[B_COERCE:%.*]], [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* @@ -3973,8 +3973,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT8X8X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE:%.*]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* @@ -4010,8 +4010,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT16X4X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT16X4X4_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X4X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT16X4X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT16X4X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT16X4X4_T]], %struct.int16x4x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <4 x i16>] [[B_COERCE:%.*]], [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* @@ -4056,8 +4056,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT32X2X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT32X2X4_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X2X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT32X2X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT32X2X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT32X2X4_T]], %struct.int32x2x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <2 x i32>] [[B_COERCE:%.*]], [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* @@ -4102,8 +4102,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT64X1X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT64X1X4_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT64X1X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_INT64X1X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_INT64X1X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT64X1X4_T]], %struct.int64x1x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <1 x i64>] [[B_COERCE:%.*]], [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* @@ -4148,8 +4148,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT16X4X4_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X4X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT16X4X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT16X4X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X4_T]], %struct.float16x4x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <4 x half>] [[B_COERCE:%.*]], [4 x <4 x half>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* @@ -4194,8 +4194,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT32X2X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT32X2X4_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X2X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT32X2X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT32X2X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT32X2X4_T]], %struct.float32x2x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <2 x float>] [[B_COERCE:%.*]], [4 x <2 x float>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* @@ -4240,8 +4240,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT64X1X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_FLOAT64X1X4_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT64X1X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_FLOAT64X1X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_FLOAT64X1X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_FLOAT64X1X4_T]], %struct.float64x1x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <1 x double>] [[B_COERCE:%.*]], [4 x <1 x double>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8* @@ -4286,8 +4286,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY8X8X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE:%.*]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* @@ -4323,8 +4323,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY16X4X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY16X4X4_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X4X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY16X4X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY16X4X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY16X4X4_T]], %struct.poly16x4x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <4 x i16>] [[B_COERCE:%.*]], [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* @@ -4369,8 +4369,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_POLY64X1X4_T:%.*]], align 8 // CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY64X1X4_T]], align 8 -// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY64X1X4_T]], align 8 // CHECK-NEXT: [[__RET:%.*]] = alloca [[STRUCT_POLY64X1X4_T]], align 8 +// CHECK-NEXT: [[__S1:%.*]] = alloca [[STRUCT_POLY64X1X4_T]], align 8 // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY64X1X4_T]], %struct.poly64x1x4_t* [[B]], i32 0, i32 0 // CHECK-NEXT: store [4 x <1 x i64>] [[B_COERCE:%.*]], [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8*
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits