https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/110815
>From 078f5f02502edadcc9c86f3e45f69e9fac918656 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Wed, 2 Oct 2024 11:20:23 +0400 Subject: [PATCH 1/3] DAG: Preserve more flags when expanding gep This allows selecting the addressing mode for stack instructions in cases where we need to prove the sign bit is zero. --- .../SelectionDAG/SelectionDAGBuilder.cpp | 41 +++++++++++++++---- .../CodeGen/AMDGPU/gep-flags-stack-offsets.ll | 6 +-- .../pointer-add-unknown-offset-debug-info.ll | 2 +- 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 25213f587116d5..6838c0b530a363 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4386,6 +4386,17 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // it. IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); + SDNodeFlags ScaleFlags; + // The multiplication of an index by the type size does not wrap the + // pointer index type in a signed sense (mul nsw). + if (NW.hasNoUnsignedSignedWrap()) + ScaleFlags.setNoSignedWrap(true); + + // The multiplication of an index by the type size does not wrap the + // pointer index type in an unsigned sense (mul nuw). + if (NW.hasNoUnsignedWrap()) + ScaleFlags.setNoUnsignedWrap(true); + if (ElementScalable) { EVT VScaleTy = N.getValueType().getScalarType(); SDValue VScale = DAG.getNode( @@ -4393,27 +4404,41 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy)); if (IsVectorGEP) VScale = DAG.getSplatVector(N.getValueType(), dl, VScale); - IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale); + IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale, + ScaleFlags); } else { // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. if (ElementMul != 1) { if (ElementMul.isPowerOf2()) { unsigned Amt = ElementMul.logBase2(); - IdxN = DAG.getNode(ISD::SHL, dl, - N.getValueType(), IdxN, - DAG.getConstant(Amt, dl, IdxN.getValueType())); + IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN, + DAG.getConstant(Amt, dl, IdxN.getValueType()), + ScaleFlags); } else { SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl, IdxN.getValueType()); - IdxN = DAG.getNode(ISD::MUL, dl, - N.getValueType(), IdxN, Scale); + IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale, + ScaleFlags); } } } - N = DAG.getNode(ISD::ADD, dl, - N.getValueType(), N, IdxN); + SDNodeFlags AddFlags; + + // The successive addition of each offset (without adding the base + // address) does not wrap the pointer index type in a signed sense (add + // nsw). + if (NW.hasNoUnsignedSignedWrap()) + AddFlags.setNoSignedWrap(true); + + // The successive addition of each offset (without adding the base + // address) does not wrap the pointer index type in an unsigned sense (add + // nuw). + if (NW.hasNoUnsignedWrap()) + AddFlags.setNoUnsignedWrap(true); + + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN, AddFlags); } } diff --git a/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll index 782894976c711c..a39afa6f609c7e 100644 --- a/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll +++ b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll @@ -118,8 +118,7 @@ define void @gep_inbounds_nuw_alloca(i32 %idx, i32 %val) #0 { ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 -; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen +; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; @@ -145,8 +144,7 @@ define void @gep_nusw_nuw_alloca(i32 %idx, i32 %val) #0 { ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 -; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen +; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll b/llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll index 63d7391bd7d4f5..5fe5a90a973174 100644 --- a/llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll +++ b/llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll @@ -12,7 +12,7 @@ define void @pointer_add_unknown_offset(ptr %base, i32 %offset) !dbg !7 { ; CHECK-NEXT: [[COPY:%[0-9]+]]:i64regs = COPY $i1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:i64regs = COPY $i0 ; CHECK-NEXT: [[SRAri:%[0-9]+]]:i64regs = SRAri [[COPY]], 0 - ; CHECK-NEXT: [[SLLXri:%[0-9]+]]:i64regs = SLLXri killed [[SRAri]], 2 + ; CHECK-NEXT: [[SLLXri:%[0-9]+]]:i64regs = nsw SLLXri killed [[SRAri]], 2 ; CHECK-NEXT: DBG_VALUE_LIST !13, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value), [[COPY1]], [[SLLXri]], debug-location !16 ; CHECK-NEXT: DBG_VALUE_LIST !14, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_plus_uconst, 3, DW_OP_stack_value), [[COPY1]], [[SLLXri]], debug-location !16 ; CHECK-NEXT: DBG_VALUE_LIST !15, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 2, DW_OP_plus, DW_OP_LLVM_arg, 1, DW_OP_LLVM_arg, 3, DW_OP_plus, DW_OP_plus, DW_OP_stack_value), [[COPY1]], [[COPY1]], [[SLLXri]], [[SLLXri]], debug-location !16 >From 792f3f6c208759c3b7cca428532f23d4477d4626 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Wed, 2 Oct 2024 14:13:31 +0400 Subject: [PATCH 2/3] Address comments This loses the benefit in the test --- .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 13 ++++--------- llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll | 6 ++++-- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 6838c0b530a363..771f219a3cbcc9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4426,15 +4426,10 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDNodeFlags AddFlags; - // The successive addition of each offset (without adding the base - // address) does not wrap the pointer index type in a signed sense (add - // nsw). - if (NW.hasNoUnsignedSignedWrap()) - AddFlags.setNoSignedWrap(true); - - // The successive addition of each offset (without adding the base - // address) does not wrap the pointer index type in an unsigned sense (add - // nuw). + // The successive addition of the current address, truncated to the + // pointer index type and interpreted as an unsigned number, and each + // offset, also interpreted as an unsigned number, does not wrap the + // pointer index type (add nuw). if (NW.hasNoUnsignedWrap()) AddFlags.setNoUnsignedWrap(true); diff --git a/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll index a39afa6f609c7e..782894976c711c 100644 --- a/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll +++ b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll @@ -118,7 +118,8 @@ define void @gep_inbounds_nuw_alloca(i32 %idx, i32 %val) #0 { ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 -; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; @@ -144,7 +145,8 @@ define void @gep_nusw_nuw_alloca(i32 %idx, i32 %val) #0 { ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0 -; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; >From 449ff2d7b9cc566a89fa9809e2503ae28fbeda75 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Wed, 9 Oct 2024 09:52:27 +0400 Subject: [PATCH 3/3] Address comments --- .../lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 771f219a3cbcc9..a981e9cc79289a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4389,13 +4389,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDNodeFlags ScaleFlags; // The multiplication of an index by the type size does not wrap the // pointer index type in a signed sense (mul nsw). - if (NW.hasNoUnsignedSignedWrap()) - ScaleFlags.setNoSignedWrap(true); + ScaleFlags.setNoSignedWrap(NW.hasNoUnsignedSignedWrap()); // The multiplication of an index by the type size does not wrap the // pointer index type in an unsigned sense (mul nuw). - if (NW.hasNoUnsignedWrap()) - ScaleFlags.setNoUnsignedWrap(true); + ScaleFlags.setNoUnsignedWrap(NW.hasNoUnsignedWrap()); if (ElementScalable) { EVT VScaleTy = N.getValueType().getScalarType(); @@ -4424,14 +4422,12 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { } } - SDNodeFlags AddFlags; - // The successive addition of the current address, truncated to the // pointer index type and interpreted as an unsigned number, and each // offset, also interpreted as an unsigned number, does not wrap the // pointer index type (add nuw). - if (NW.hasNoUnsignedWrap()) - AddFlags.setNoUnsignedWrap(true); + SDNodeFlags AddFlags; + AddFlags.setNoUnsignedWrap(NW.hasNoUnsignedWrap()); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN, AddFlags); } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits