[clang] Added vector intrinsics for shift left (PR #187516)

via cfe-commits Thu, 19 Mar 2026 07:51:20 -0700

https://github.com/albertbolt1 created 
https://github.com/llvm/llvm-project/pull/187516


Added vector intrinsics for 
vshlq_n_s8
vshlq_n_s16
vshlq_n_s32
vshlq_n_s64
vshlq_n_u8
vshlq_n_u16
vshlq_n_u32
vshlq_n_u64

vshl_n_s8
vshl_n_s16
vshl_n_s32
vshl_n_s64
vshl_n_u8
vshl_n_u16
vshl_n_u32
vshl_n_u64

these cover all the vector intrinsics for constant shift 

the method followed 

1) the vectors for quad words are of the form 64*2, 32*4, 16*8, 8*16 and the 
shift is a constant value but for shift left we need both of them to be vectors 
so we take the constant shift and convert it into a vector of respective form, 
for 64*2 we convert the constant to 64*2, I have learnt that this process is 
also called **splat**
2) After splat we have that the lhs and rhs are of the same size hence the 
shift left can be applied 
3) There is one issue though, the ops[0] is not of the right size, for quad 
words it falls back to the default int8*16 in the function, so I am converting 
it to the required size using bit casting, 8*16 = 64*2 so we can bitcast and 
get the vector array in the right form.


Wrote the test cases for all the intrinsics listed above



>From 33792d29d3b9e80876a1b3bfdd50b2612d493e5c Mon Sep 17 00:00:00 2001
From: albertbolt <[email protected]>
Date: Thu, 19 Mar 2026 19:33:16 +0530
Subject: [PATCH] adding vector intrinsics for shift left

---
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  |  14 +-
 clang/test/CodeGen/AArch64/neon/intrinsics.c  | 163 ++++++++++++++++++
 2 files changed, 175 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 5d7b8d839fa84..c9960be7b0b6b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -860,6 +860,7 @@ static mlir::Value emitCommonNeonBuiltinExpr(
   cir::VectorType vTy = getNeonType(&cgf, neonType, loc, hasLegalHalfType,
                                     false, allowBFloatArgsAndRet);
   mlir::Type ty = vTy;
+
   if (!ty)
     return nullptr;
 
@@ -1053,8 +1054,17 @@ static mlir::Value emitCommonNeonBuiltinExpr(
   case NEON::BI__builtin_neon_vsha512h2q_u64:
   case NEON::BI__builtin_neon_vsha512su0q_u64:
   case NEON::BI__builtin_neon_vsha512su1q_u64:
+    cgf.cgm.errorNYI(expr->getSourceRange(),
+                     std::string("unimplemented AArch64 builtin call: ") +
+                         ctx.BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
   case NEON::BI__builtin_neon_vshl_n_v:
-  case NEON::BI__builtin_neon_vshlq_n_v:
+  case NEON::BI__builtin_neon_vshlq_n_v: {
+    auto rhsScalar = cgf.getBuilder().createIntCast(ops[1], 
vTy.getElementType());
+    auto rhsVec = cir::VecSplatOp::create(cgf.getBuilder(),loc, vTy, 
rhsScalar);
+    auto lhsVec = cgf.getBuilder().createBitcast(ops[0], vTy);
+    return cgf.getBuilder().createShiftLeft(loc, lhsVec, rhsVec);
+  }
   case NEON::BI__builtin_neon_vshll_n_v:
   case NEON::BI__builtin_neon_vshrn_n_v:
   case NEON::BI__builtin_neon_vshr_n_v:
@@ -2593,7 +2603,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   // defer to common code if it's been added to our special map.
   builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, builtinID,
                                         aarch64SIMDIntrinsicsProvenSorted);
-
+  
   if (builtin)
     return emitCommonNeonBuiltinExpr(
         *this, builtin->builtinID, builtin->llvmIntrinsic,
diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c 
b/clang/test/CodeGen/AArch64/neon/intrinsics.c
index bf8e62feda8da..af2341659f437 100644
--- a/clang/test/CodeGen/AArch64/neon/intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -982,3 +982,166 @@ int64_t test_vshld_u64(int64_t a,int64_t b) {
   return (int64_t)vshld_u64(a, b);
 }
 
+// ALL-LABEL: test_vshlq_n_s8
+int8x16_t test_vshlq_n_s8(int8x16_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<16 x !s8i>, 
%{{.*}} : !cir.vector<16 x !s8i>) -> !cir.vector<16 x !s8i>
+  
+ // LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <16 x i8> {{.*}}, splat (i8 1)
+ // LLVM: ret <16 x i8> [[SHL]]
+ return vshlq_n_s8(a, 1);
+}
+
+// ALL-LABEL: test_vshlq_n_s16
+int16x8_t test_vshlq_n_s16(int16x8_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<8 x !s16i>, 
%{{.*}} : !cir.vector<8 x !s16i>) -> !cir.vector<8 x !s16i>
+  
+ // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <8 x i16> {{.*}}, splat (i16 1)
+ // LLVM: ret <8 x i16> [[SHL]]
+ return vshlq_n_s16(a, 1);
+}
+
+// ALL-LABEL: test_vshlq_n_s32
+int32x4_t test_vshlq_n_s32(int32x4_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<4 x !s32i>, 
%{{.*}} : !cir.vector<4 x !s32i>) -> !cir.vector<4 x !s32i>
+  
+ // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <4 x i32> {{.*}}, splat (i32 1)
+ // LLVM: ret <4 x i32> [[SHL]]
+ return vshlq_n_s32(a, 1);
+}
+
+// ALL-LABEL: test_vshlq_n_s64
+int64x2_t test_vshlq_n_s64(int64x2_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<2 x !s64i>, 
%{{.*}} : !cir.vector<2 x !s64i>) -> !cir.vector<2 x !s64i>
+  
+ // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <2 x i64> {{.*}}, splat (i64 1)
+ // LLVM: ret <2 x i64> [[SHL]]
+ return vshlq_n_s64(a, 1);
+}
+
+// ALL-LABEL: test_vshlq_n_u8
+uint8x16_t test_vshlq_n_u8(uint8x16_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<16 x !u8i>, 
%{{.*}} : !cir.vector<16 x !u8i>) -> !cir.vector<16 x !u8i>
+  
+ // LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <16 x i8> {{.*}}, splat (i8 1)
+ // LLVM: ret <16 x i8> [[SHL]]
+ return vshlq_n_u8(a, 1);
+}
+
+// ALL-LABEL: test_vshlq_n_u16
+uint16x8_t test_vshlq_n_u16(uint16x8_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<8 x !u16i>, 
%{{.*}} : !cir.vector<8 x !u16i>) -> !cir.vector<8 x !u16i>
+  
+ // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <8 x i16> {{.*}}, splat (i16 1)
+ // LLVM: ret <8 x i16> [[SHL]]
+ return vshlq_n_u16(a, 1);
+}
+
+// ALL-LABEL: test_vshlq_n_u32
+uint32x4_t test_vshlq_n_u32(uint32x4_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<4 x !u32i>, 
%{{.*}} : !cir.vector<4 x !u32i>) -> !cir.vector<4 x !u32i>
+  
+ // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <4 x i32> {{.*}}, splat (i32 1)
+ // LLVM: ret <4 x i32> [[SHL]]
+ return vshlq_n_u32(a, 1);
+}
+
+// ALL-LABEL: test_vshlq_n_u64
+uint64x2_t test_vshlq_n_u64(uint64x2_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<2 x !u64i>, 
%{{.*}} : !cir.vector<2 x !u64i>) -> !cir.vector<2 x !u64i>
+  
+ // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <2 x i64> {{.*}}, splat (i64 1)
+ // LLVM: ret <2 x i64> [[SHL]]
+ return vshlq_n_u64(a, 1);
+}
+
+// ALL-LABEL: test_vshl_n_s8
+int8x8_t test_vshl_n_s8(int8x8_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<8 x !s8i>, 
%{{.*}} : !cir.vector<8 x !s8i>) -> !cir.vector<8 x !s8i>
+  
+ // LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <8 x i8> {{.*}}, splat (i8 1)
+ // LLVM: ret <8 x i8> [[SHL]]
+ return vshl_n_s8(a, 1);
+}
+
+// ALL-LABEL: test_vshl_n_s16
+int16x4_t test_vshl_n_s16(int16x4_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<4 x !s16i>, 
%{{.*}} : !cir.vector<4 x !s16i>) -> !cir.vector<4 x !s16i>
+  
+ // LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <4 x i16> {{.*}}, splat (i16 1)
+ // LLVM: ret <4 x i16> [[SHL]]
+ return vshl_n_s16(a, 1);
+}
+
+// ALL-LABEL: test_vshl_n_s32
+int32x2_t test_vshl_n_s32(int32x2_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<2 x !s32i>, 
%{{.*}} : !cir.vector<2 x !s32i>) -> !cir.vector<2 x !s32i>
+  
+ // LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <2 x i32> {{.*}}, splat (i32 1)
+ // LLVM: ret <2 x i32> [[SHL]]
+ return vshl_n_s32(a, 1);
+}
+
+// ALL-LABEL: test_vshl_n_s64
+int64x1_t test_vshl_n_s64(int64x1_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<1 x !s64i>, 
%{{.*}} : !cir.vector<1 x !s64i>) -> !cir.vector<1 x !s64i>
+  
+ // LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <1 x i64> {{.*}}, splat (i64 1)
+ // LLVM: ret <1 x i64> [[SHL]]
+ return vshl_n_s64(a, 1);
+}
+
+// ALL-LABEL: test_vshl_n_u8
+uint8x8_t test_vshl_n_u8(uint8x8_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<8 x !u8i>, 
%{{.*}} : !cir.vector<8 x !u8i>) -> !cir.vector<8 x !u8i>
+  
+ // LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <8 x i8> {{.*}}, splat (i8 1)
+ // LLVM: ret <8 x i8> [[SHL]]
+ return vshl_n_u8(a, 1);
+}
+
+// ALL-LABEL: test_vshl_n_u16
+uint16x4_t test_vshl_n_u16(uint16x4_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<4 x !u16i>, 
%{{.*}} : !cir.vector<4 x !u16i>) -> !cir.vector<4 x !u16i>
+
+  
+ // LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <4 x i16> {{.*}}, splat (i16 1)
+ // LLVM: ret <4 x i16> [[SHL]]
+ return vshl_n_u16(a, 1);
+}
+
+// ALL-LABEL: test_vshl_n_u32
+uint32x2_t test_vshl_n_u32(uint32x2_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<2 x !u32i>, 
%{{.*}} : !cir.vector<2 x !u32i>) -> !cir.vector<2 x !u32i>
+  
+ // LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <2 x i32> {{.*}}, splat (i32 1)
+ // LLVM: ret <2 x i32> [[SHL]]
+ return vshl_n_u32(a, 1);
+}
+
+// ALL-LABEL: test_vshl_n_u64
+uint64x1_t test_vshl_n_u64(uint64x1_t a, int64_t b) {
+ // CIR: [[RES:%.*]] = cir.shift(left, %{{.*}} : !cir.vector<1 x !u64i>, 
%{{.*}} : !cir.vector<1 x !u64i>) -> !cir.vector<1 x !u64i>
+  
+ // LLVM-SAME: <1 x i64> {{.*}} [[A:%.*]], i64 {{.*}} [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
+ // LLVM: [[SHL:%.*]] = shl <1 x i64> {{.*}}, splat (i64 1)
+ // LLVM: ret <1 x i64> [[SHL]]
+ return vshl_n_u64(a, 1);
+}
+
+
+

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Added vector intrinsics for shift left (PR #187516)

Reply via email to