================ @@ -139,21 +134,20 @@ define void @foo4() { ; PTX32-EMPTY: ; PTX32-NEXT: // %bb.0: ; PTX32-NEXT: mov.b32 %SPL, __local_depot3; -; PTX32-NEXT: cvta.local.u32 %SP, %SPL; -; PTX32-NEXT: add.u32 %r1, %SP, 0; -; PTX32-NEXT: add.u32 %r2, %SPL, 0; -; PTX32-NEXT: add.u32 %r3, %SP, 4; -; PTX32-NEXT: add.u32 %r4, %SPL, 4; -; PTX32-NEXT: st.local.b32 [%r2], 0; -; PTX32-NEXT: st.local.b32 [%r4], 0; +; PTX32-NEXT: add.u32 %r1, %SPL, 0; +; PTX32-NEXT: cvta.local.u32 %r2, %r1; +; PTX32-NEXT: add.u32 %r3, %SPL, 4; +; PTX32-NEXT: cvta.local.u32 %r4, %r3; +; PTX32-NEXT: st.local.b32 [%SPL], 0; +; PTX32-NEXT: st.local.b32 [%SPL+4], 0; ---------------- AlexMaclean wrote:
> I am wondering if a simple peephole optimization is missing. Funny that you should mention peepholes. I think `NVPTXPeephole.cpp` may need to be significantly updated. Perhaps instead of what it does now it should identify all the cvta instructions from frame indices and centralize them all into a single such instruction. As we would have prior to this change. https://github.com/llvm/llvm-project/pull/154814 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits