RKSimon created this revision. RKSimon added reviewers: craig.topper, spatel, rnk, thakis. Herald added a subscriber: pengfei. Herald added a project: clang. RKSimon requested review of this revision.
Now that funnel shift handling is pretty good, we can use the intrinsics directly and avoid a lot of zext/trunc issues. https://godbolt.org/z/YqhnnM Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D89405 Files: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/X86/ms-x86-intrinsics.c Index: clang/test/CodeGen/X86/ms-x86-intrinsics.c =================================================================== --- clang/test/CodeGen/X86/ms-x86-intrinsics.c +++ clang/test/CodeGen/X86/ms-x86-intrinsics.c @@ -144,14 +144,8 @@ return __shiftleft128(l, h, d); } // CHECK-X64-LABEL: define dso_local i64 @test__shiftleft128(i64 %l, i64 %h, i8 %d) -// CHECK-X64: = zext i64 %{{.*}} to i128 -// CHECK-X64: = shl nuw i128 %{{.*}}, 64 -// CHECK-X64: = zext i64 %{{.*}} to i128 -// CHECK-X64: = or i128 % -// CHECK-X64: = and i8 %{{.*}}, 63 -// CHECK-X64: = shl i128 % -// CHECK-X64: = lshr i128 % -// CHECK-X64: = trunc i128 % +// CHECK-X64: = zext i8 %{{.*}} to i64 +// CHECK-X64: = tail call i64 @llvm.fshl.i64(i64 %h, i64 %l, i64 %{{.*}}) // CHECK-X64: ret i64 % unsigned __int64 test__shiftright128(unsigned __int64 l, unsigned __int64 h, @@ -159,13 +153,8 @@ return __shiftright128(l, h, d); } // CHECK-X64-LABEL: define dso_local i64 @test__shiftright128(i64 %l, i64 %h, i8 %d) -// CHECK-X64: = zext i64 %{{.*}} to i128 -// CHECK-X64: = shl nuw i128 %{{.*}}, 64 -// CHECK-X64: = zext i64 %{{.*}} to i128 -// CHECK-X64: = or i128 % -// CHECK-X64: = and i8 %{{.*}}, 63 -// CHECK-X64: = lshr i128 % -// CHECK-X64: = trunc i128 % +// CHECK-X64: = zext i8 %{{.*}} to i64 +// CHECK-X64: = tail call i64 @llvm.fshr.i64(i64 %h, i64 %l, i64 %{{.*}}) // CHECK-X64: ret i64 % #endif // defined(__x86_64__) Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -13926,25 +13926,15 @@ } case X86::BI__shiftleft128: case X86::BI__shiftright128: { - // FIXME: Once fshl/fshr no longer add an unneeded and and cmov, do this: - // llvm::Function *F = CGM.getIntrinsic( - // BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr, - // Int64Ty); - // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); - // return Builder.CreateCall(F, Ops); - llvm::Type *Int128Ty = Builder.getInt128Ty(); - Value *HighPart128 = - Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64); - Value *LowPart128 = Builder.CreateZExt(Ops[0], Int128Ty); - Value *Val = Builder.CreateOr(HighPart128, LowPart128); - Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty), - llvm::ConstantInt::get(Int128Ty, 0x3f)); - Value *Res; - if (BuiltinID == X86::BI__shiftleft128) - Res = Builder.CreateLShr(Builder.CreateShl(Val, Amt), 64); - else - Res = Builder.CreateLShr(Val, Amt); - return Builder.CreateTrunc(Res, Int64Ty); + llvm::Function *F = CGM.getIntrinsic( + BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr, + Int64Ty); + // Flip low/high ops and zero-extend amount to matching type. + // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt) + // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt) + std::swap(Ops[0], Ops[1]); + Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); + return Builder.CreateCall(F, Ops); } case X86::BI_ReadWriteBarrier: case X86::BI_ReadBarrier:
Index: clang/test/CodeGen/X86/ms-x86-intrinsics.c =================================================================== --- clang/test/CodeGen/X86/ms-x86-intrinsics.c +++ clang/test/CodeGen/X86/ms-x86-intrinsics.c @@ -144,14 +144,8 @@ return __shiftleft128(l, h, d); } // CHECK-X64-LABEL: define dso_local i64 @test__shiftleft128(i64 %l, i64 %h, i8 %d) -// CHECK-X64: = zext i64 %{{.*}} to i128 -// CHECK-X64: = shl nuw i128 %{{.*}}, 64 -// CHECK-X64: = zext i64 %{{.*}} to i128 -// CHECK-X64: = or i128 % -// CHECK-X64: = and i8 %{{.*}}, 63 -// CHECK-X64: = shl i128 % -// CHECK-X64: = lshr i128 % -// CHECK-X64: = trunc i128 % +// CHECK-X64: = zext i8 %{{.*}} to i64 +// CHECK-X64: = tail call i64 @llvm.fshl.i64(i64 %h, i64 %l, i64 %{{.*}}) // CHECK-X64: ret i64 % unsigned __int64 test__shiftright128(unsigned __int64 l, unsigned __int64 h, @@ -159,13 +153,8 @@ return __shiftright128(l, h, d); } // CHECK-X64-LABEL: define dso_local i64 @test__shiftright128(i64 %l, i64 %h, i8 %d) -// CHECK-X64: = zext i64 %{{.*}} to i128 -// CHECK-X64: = shl nuw i128 %{{.*}}, 64 -// CHECK-X64: = zext i64 %{{.*}} to i128 -// CHECK-X64: = or i128 % -// CHECK-X64: = and i8 %{{.*}}, 63 -// CHECK-X64: = lshr i128 % -// CHECK-X64: = trunc i128 % +// CHECK-X64: = zext i8 %{{.*}} to i64 +// CHECK-X64: = tail call i64 @llvm.fshr.i64(i64 %h, i64 %l, i64 %{{.*}}) // CHECK-X64: ret i64 % #endif // defined(__x86_64__) Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -13926,25 +13926,15 @@ } case X86::BI__shiftleft128: case X86::BI__shiftright128: { - // FIXME: Once fshl/fshr no longer add an unneeded and and cmov, do this: - // llvm::Function *F = CGM.getIntrinsic( - // BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr, - // Int64Ty); - // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); - // return Builder.CreateCall(F, Ops); - llvm::Type *Int128Ty = Builder.getInt128Ty(); - Value *HighPart128 = - Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64); - Value *LowPart128 = Builder.CreateZExt(Ops[0], Int128Ty); - Value *Val = Builder.CreateOr(HighPart128, LowPart128); - Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty), - llvm::ConstantInt::get(Int128Ty, 0x3f)); - Value *Res; - if (BuiltinID == X86::BI__shiftleft128) - Res = Builder.CreateLShr(Builder.CreateShl(Val, Amt), 64); - else - Res = Builder.CreateLShr(Val, Amt); - return Builder.CreateTrunc(Res, Int64Ty); + llvm::Function *F = CGM.getIntrinsic( + BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr, + Int64Ty); + // Flip low/high ops and zero-extend amount to matching type. + // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt) + // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt) + std::swap(Ops[0], Ops[1]); + Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); + return Builder.CreateCall(F, Ops); } case X86::BI_ReadWriteBarrier: case X86::BI_ReadBarrier:
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits