farzonl wrote: I also did some per obj file testing for compile time and file size and instruction count. I think there might be a win here. You reported that [SPIR was producing 30m instructions](https://github.com/llvm/llvm-project/pull/132252#issuecomment-2744454287) I don't see anything close to that via objdump. I'm seeing 661 instructions. Its such a large disparity though that I have low confidents I'm doing this the way you are.
## File size ``` 1.6M TargetBuiltins/ARM.cpp.o 93K TargetBuiltins/AMDGPU.cpp.o 19K TargetBuiltins/Hexagon.cpp.o 67K TargetBuiltins/NVPTX.cpp.o 141K TargetBuiltins/PPC.cpp.o 1.1M TargetBuiltins/RISCV.cpp.o 3.3K TargetBuiltins/SPIR.cpp.o 29K TargetBuiltins/SystemZ.cpp.o 26K TargetBuiltins/WebAssembly.cpp.o 176K TargetBuiltins/X86.cpp.o ``` ## Compile time per obj file ``` time to compile TargetBuiltins.dir/ARM.cpp.o real 0m11.185s user 0m10.706s sys 0m0.478s time to compile TargetBuiltins.dir/AMDGPU.cpp.o real 0m7.228s user 0m6.776s sys 0m0.452s time to compile TargetBuiltins.dir/Hexagon.cpp.o real 0m6.979s user 0m6.551s sys 0m0.428s time to compile TargetBuiltins.dir/NVPTX.cpp.o real 0m7.037s user 0m6.616s sys 0m0.421s time to compile TargetBuiltins.dir/PPC.cpp.o real 0m7.650s user 0m7.196s sys 0m0.454s time to compile TargetBuiltins.dir/RISCV.cpp.o real 0m24.478s user 0m23.982s sys 0m0.492s time to compile TargetBuiltins.dir/SPIR.cpp.o real 0m6.809s user 0m6.373s sys 0m0.435s time to compile TargetBuiltins.dir/SystemZ.cpp.o real 0m7.002s user 0m6.555s sys 0m0.447s time to compile TargetBuiltins.dir/WebAssembly.cpp.o real 0m6.935s user 0m6.528s sys 0m0.407s time to compile TargetBuiltins.dir/X86.cpp.o real 0m7.535s user 0m7.122s sys 0m0.413s ``` ## SPIR obj dump results ```asm 000000000000000 <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE>: 0: 81 c6 40 fa ff ff add $0xfffffa40,%esi 6: 83 fe 03 cmp $0x3,%esi 9: 0f 87 84 02 00 00 ja 293 <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x293> f: 41 57 push %r15 11: 41 56 push %r14 13: 41 54 push %r12 15: 53 push %rbx 16: 48 83 ec 48 sub $0x48,%rsp 1a: 48 8d 05 00 00 00 00 lea 0x0(%rip),%rax # 21 <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x21> 21: 48 63 0c b0 movslq (%rax,%rsi,4),%rcx 25: 48 01 c1 add %rax,%rcx 28: ff e1 jmp *%rcx 2a: 8b 02 mov (%rdx),%eax 2c: 89 c1 mov %eax,%ecx 2e: c1 e9 18 shr $0x18,%ecx 31: 48 01 d1 add %rdx,%rcx 34: c1 e8 10 shr $0x10,%eax 37: 83 e0 08 and $0x8,%eax 3a: 48 8b 74 08 08 mov 0x8(%rax,%rcx,1),%rsi 3f: 49 89 fe mov %rdi,%r14 42: 49 89 d7 mov %rdx,%r15 45: 31 d2 xor %edx,%edx 47: e8 00 00 00 00 call 4c <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x4c> 4c: 48 89 c3 mov %rax,%rbx 4f: 41 8b 07 mov (%r15),%eax 52: 89 c1 mov %eax,%ecx 54: c1 e9 18 shr $0x18,%ecx 57: 4c 01 f9 add %r15,%rcx 5a: c1 e8 10 shr $0x10,%eax 5d: 83 e0 08 and $0x8,%eax 60: 48 8b 74 08 10 mov 0x10(%rax,%rcx,1),%rsi 65: 4c 89 f7 mov %r14,%rdi 68: 31 d2 xor %edx,%edx 6a: e8 00 00 00 00 call 6f <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x6f> 6f: 49 81 c6 18 01 00 00 add $0x118,%r14 76: 48 8b 73 08 mov 0x8(%rbx),%rsi 7a: 0f b6 4e 08 movzbl 0x8(%rsi),%ecx 7e: 83 c1 ef add $0xffffffef,%ecx 81: 83 f9 01 cmp $0x1,%ecx 84: 77 07 ja 8d <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x8d> 86: 48 8b 4e 10 mov 0x10(%rsi),%rcx 8a: 48 8b 31 mov (%rcx),%rsi 8d: 48 89 5c 24 08 mov %rbx,0x8(%rsp) 92: 48 89 44 24 10 mov %rax,0x10(%rsp) 97: 48 8d 05 00 00 00 00 lea 0x0(%rip),%rax # 9e <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x9e> 9e: 48 89 44 24 20 mov %rax,0x20(%rsp) a3: 66 c7 44 24 40 03 01 movw $0x103,0x40(%rsp) aa: 48 8d 44 24 20 lea 0x20(%rsp),%rax af: 48 89 04 24 mov %rax,(%rsp) b3: 48 8d 4c 24 08 lea 0x8(%rsp),%rcx b8: 41 b8 02 00 00 00 mov $0x2,%r8d be: 4c 89 f7 mov %r14,%rdi c1: ba e9 2d 00 00 mov $0x2de9,%edx c6: e9 b4 01 00 00 jmp 27f <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x27f> cb: 8b 02 mov (%rdx),%eax cd: 89 c1 mov %eax,%ecx cf: c1 e9 18 shr $0x18,%ecx d2: 48 01 d1 add %rdx,%rcx d5: c1 e8 10 shr $0x10,%eax d8: 83 e0 08 and $0x8,%eax db: 48 8b 74 08 08 mov 0x8(%rax,%rcx,1),%rsi e0: 48 89 fb mov %rdi,%rbx e3: 49 89 d7 mov %rdx,%r15 e6: 31 d2 xor %edx,%edx e8: e8 00 00 00 00 call ed <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0xed> ed: 49 89 c6 mov %rax,%r14 f0: 41 8b 07 mov (%r15),%eax f3: 89 c1 mov %eax,%ecx f5: c1 e9 18 shr $0x18,%ecx f8: 4c 01 f9 add %r15,%rcx fb: c1 e8 10 shr $0x10,%eax fe: 83 e0 08 and $0x8,%eax 101: 48 8b 74 08 10 mov 0x10(%rax,%rcx,1),%rsi 106: 48 89 df mov %rbx,%rdi 109: 31 d2 xor %edx,%edx 10b: e8 00 00 00 00 call 110 <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x110> 110: 48 81 c3 18 01 00 00 add $0x118,%rbx 117: 49 8b 76 08 mov 0x8(%r14),%rsi 11b: 4c 89 74 24 08 mov %r14,0x8(%rsp) 120: 48 89 44 24 10 mov %rax,0x10(%rsp) 125: 48 8d 05 00 00 00 00 lea 0x0(%rip),%rax # 12c <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x12c> 12c: 48 89 44 24 20 mov %rax,0x20(%rsp) 131: 66 c7 44 24 40 03 01 movw $0x103,0x40(%rsp) 138: 48 8d 44 24 20 lea 0x20(%rsp),%rax 13d: 48 89 04 24 mov %rax,(%rsp) 141: 48 8d 4c 24 08 lea 0x8(%rsp),%rcx 146: 41 b8 02 00 00 00 mov $0x2,%r8d 14c: 48 89 df mov %rbx,%rdi 14f: ba 06 2e 00 00 mov $0x2e06,%edx 154: e9 26 01 00 00 jmp 27f <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x27f> 159: 8b 02 mov (%rdx),%eax 15b: 89 c1 mov %eax,%ecx 15d: c1 e9 18 shr $0x18,%ecx 160: 48 01 d1 add %rdx,%rcx 163: c1 e8 10 shr $0x10,%eax 166: 83 e0 08 and $0x8,%eax 169: 48 8b 74 08 08 mov 0x8(%rax,%rcx,1),%rsi 16e: 48 89 fb mov %rdi,%rbx 171: 49 89 d4 mov %rdx,%r12 174: 31 d2 xor %edx,%edx 176: e8 00 00 00 00 call 17b <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x17b> 17b: 49 89 c6 mov %rax,%r14 17e: 41 8b 04 24 mov (%r12),%eax 182: 89 c1 mov %eax,%ecx 184: c1 e9 18 shr $0x18,%ecx 187: 4c 01 e1 add %r12,%rcx 18a: c1 e8 10 shr $0x10,%eax 18d: 83 e0 08 and $0x8,%eax 190: 48 8b 74 08 10 mov 0x10(%rax,%rcx,1),%rsi 195: 48 89 df mov %rbx,%rdi 198: 31 d2 xor %edx,%edx 19a: e8 00 00 00 00 call 19f <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x19f> 19f: 49 89 c7 mov %rax,%r15 1a2: 41 8b 04 24 mov (%r12),%eax 1a6: 89 c1 mov %eax,%ecx 1a8: c1 e9 18 shr $0x18,%ecx 1ab: 4c 01 e1 add %r12,%rcx 1ae: c1 e8 10 shr $0x10,%eax 1b1: 83 e0 08 and $0x8,%eax 1b4: 48 8b 74 08 18 mov 0x18(%rax,%rcx,1),%rsi 1b9: 48 89 df mov %rbx,%rdi 1bc: 31 d2 xor %edx,%edx 1be: e8 00 00 00 00 call 1c3 <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x1c3> 1c3: 48 81 c3 18 01 00 00 add $0x118,%rbx 1ca: 49 8b 76 08 mov 0x8(%r14),%rsi 1ce: 4c 89 74 24 08 mov %r14,0x8(%rsp) 1d3: 4c 89 7c 24 10 mov %r15,0x10(%rsp) 1d8: 48 89 44 24 18 mov %rax,0x18(%rsp) 1dd: 48 8d 05 00 00 00 00 lea 0x0(%rip),%rax # 1e4 <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x1e4> 1e4: 48 89 44 24 20 mov %rax,0x20(%rsp) 1e9: 66 c7 44 24 40 03 01 movw $0x103,0x40(%rsp) 1f0: 48 8d 44 24 20 lea 0x20(%rsp),%rax 1f5: 48 89 04 24 mov %rax,(%rsp) 1f9: 48 8d 4c 24 08 lea 0x8(%rsp),%rcx 1fe: 41 b8 03 00 00 00 mov $0x3,%r8d 204: 48 89 df mov %rbx,%rdi 207: ba 12 2e 00 00 mov $0x2e12,%edx 20c: eb 71 jmp 27f <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x27f> 20e: 8b 02 mov (%rdx),%eax 210: 89 c1 mov %eax,%ecx 212: c1 e9 18 shr $0x18,%ecx 215: 48 01 d1 add %rdx,%rcx 218: c1 e8 10 shr $0x10,%eax 21b: 83 e0 08 and $0x8,%eax 21e: 48 8b 74 08 08 mov 0x8(%rax,%rcx,1),%rsi 223: 48 89 fb mov %rdi,%rbx 226: 31 d2 xor %edx,%edx 228: e8 00 00 00 00 call 22d <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x22d> 22d: 48 81 c3 18 01 00 00 add $0x118,%rbx 234: 48 8b 70 08 mov 0x8(%rax),%rsi 238: 0f b6 4e 08 movzbl 0x8(%rsi),%ecx 23c: 83 c1 ef add $0xffffffef,%ecx 23f: 83 f9 01 cmp $0x1,%ecx 242: 77 07 ja 24b <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x24b> 244: 48 8b 4e 10 mov 0x10(%rsi),%rcx 248: 48 8b 31 mov (%rcx),%rsi 24b: 48 89 44 24 08 mov %rax,0x8(%rsp) 250: 48 8d 05 00 00 00 00 lea 0x0(%rip),%rax # 257 <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x257> 257: 48 89 44 24 20 mov %rax,0x20(%rsp) 25c: 66 c7 44 24 40 03 01 movw $0x103,0x40(%rsp) 263: 48 8d 44 24 20 lea 0x20(%rsp),%rax 268: 48 89 04 24 mov %rax,(%rsp) 26c: 48 8d 4c 24 08 lea 0x8(%rsp),%rcx 271: 41 b8 01 00 00 00 mov $0x1,%r8d 277: 48 89 df mov %rbx,%rdi 27a: ba fc 2d 00 00 mov $0x2dfc,%edx 27f: 45 31 c9 xor %r9d,%r9d 282: e8 00 00 00 00 call 287 <_ZN5clang7CodeGen15CodeGenFunction20EmitSPIRVBuiltinExprEjPKNS_8CallExprE+0x287> 287: 48 83 c4 48 add $0x48,%rsp 28b: 5b pop %rbx 28c: 41 5c pop %r12 28e: 41 5e pop %r14 290: 41 5f pop %r15 292: c3 ret 293: 31 c0 xor %eax,%eax 295: c3 ret ``` https://github.com/llvm/llvm-project/pull/133619 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits