================
@@ -139,21 +134,20 @@ define void @foo4() {
 ; PTX32-EMPTY:
 ; PTX32-NEXT:  // %bb.0:
 ; PTX32-NEXT:    mov.b32 %SPL, __local_depot3;
-; PTX32-NEXT:    cvta.local.u32 %SP, %SPL;
-; PTX32-NEXT:    add.u32 %r1, %SP, 0;
-; PTX32-NEXT:    add.u32 %r2, %SPL, 0;
-; PTX32-NEXT:    add.u32 %r3, %SP, 4;
-; PTX32-NEXT:    add.u32 %r4, %SPL, 4;
-; PTX32-NEXT:    st.local.b32 [%r2], 0;
-; PTX32-NEXT:    st.local.b32 [%r4], 0;
+; PTX32-NEXT:    add.u32 %r1, %SPL, 0;
+; PTX32-NEXT:    cvta.local.u32 %r2, %r1;
+; PTX32-NEXT:    add.u32 %r3, %SPL, 4;
+; PTX32-NEXT:    cvta.local.u32 %r4, %r3;
+; PTX32-NEXT:    st.local.b32 [%SPL], 0;
+; PTX32-NEXT:    st.local.b32 [%SPL+4], 0;
----------------
AlexMaclean wrote:

> I am wondering if a simple peephole optimization is missing.

Funny that you should mention peepholes. I think `NVPTXPeephole.cpp` may need 
to be significantly updated. Perhaps instead of what it does now it should 
identify all the cvta instructions from frame indices and centralize them all 
into a single such instruction. As we would have prior to this change. 

https://github.com/llvm/llvm-project/pull/154814
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to