john-brawn-arm wrote:
I'm also seeing this cause global merging to no longer occur.
tmp.c
```
int var1 = 0;
int var2 = 1;
int var3 = 2;
__attribute__((noinline)) void fn() {
var1 += var2 + var3;
}
int main(int argc, char **argv) {
var1 = *argv[0];
var2 = *argv[1];
var3 = *argv[2];
fn();
return var1;
}
```
Compiled with ``clang --target=arm-none-eabi -mcpu=cortex-m55 -mfloat-abi=hard
-O3 tmp.c -flto -c`` the IR is
```
; ModuleID = 'new.o'
source_filename = "tmp.c"
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-unknown-none-eabihf"
@var1 = dso_local local_unnamed_addr global i32 0, align 4, !guid !0
@var2 = dso_local local_unnamed_addr global i32 1, align 4, !guid !1
@var3 = dso_local local_unnamed_addr global i32 2, align 4, !guid !2
; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind
willreturn memory(readwrite, argmem: none, inaccessiblemem: none, target_mem:
none)
define dso_local void @fn() local_unnamed_addr #0 !guid !13 {
%1 = load i32, ptr @var2, align 4, !tbaa !9
%2 = load i32, ptr @var3, align 4, !tbaa !9
%3 = add nsw i32 %2, %1
%4 = load i32, ptr @var1, align 4, !tbaa !9
%5 = add nsw i32 %3, %4
store i32 %5, ptr @var1, align 4, !tbaa !9
ret void
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
memory(readwrite, inaccessiblemem: none, target_mem: none)
define dso_local i32 @main(i32 noundef %0, ptr noundef readonly captures(none)
%1) local_unnamed_addr #1 !guid !14 {
%3 = load ptr, ptr %1, align 4, !tbaa !15
%4 = load i8, ptr %3, align 1, !tbaa !18
%5 = zext i8 %4 to i32
store i32 %5, ptr @var1, align 4, !tbaa !9
%6 = getelementptr inbounds nuw i8, ptr %1, i32 4
%7 = load ptr, ptr %6, align 4, !tbaa !15
%8 = load i8, ptr %7, align 1, !tbaa !18
%9 = zext i8 %8 to i32
store i32 %9, ptr @var2, align 4, !tbaa !9
%10 = getelementptr inbounds nuw i8, ptr %1, i32 8
%11 = load ptr, ptr %10, align 4, !tbaa !15
%12 = load i8, ptr %11, align 1, !tbaa !18
%13 = zext i8 %12 to i32
store i32 %13, ptr @var3, align 4, !tbaa !9
tail call void @fn()
%14 = load i32, ptr @var1, align 4, !tbaa !9
ret i32 %14
}
attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind
willreturn memory(readwrite, argmem: none, inaccessiblemem: none, target_mem:
none) "no-trapping-math"="true" "stack-protector-buffer-size"="
8" "target-cpu"="cortex-m55"
"target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp
,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-pacbti,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp"
}
attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn
memory(readwrite, inaccessiblemem: none, target_mem: none)
"no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex
-m55"
"target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cd
ecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-pacbti,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp"
}
!llvm.module.flags = !{!3, !4, !5, !6, !7}
!llvm.ident = !{!8}
!llvm.errno.tbaa = !{!9}
!0 = !{i64 -7857281302475341212}
!1 = !{i64 -3534809055184556418}
!2 = !{i64 6020659679014300478}
!3 = !{i32 1, !"min_enum_size", i32 4}
!4 = !{i32 4, !"arm-eabi-fp-denormal", i32 1}
!5 = !{i32 8, !"arm-eabi-fp-number-model", i32 3}
!6 = !{i32 1, !"ThinLTO", i32 0}
!7 = !{i32 1, !"EnableSplitLTOUnit", i32 1}
!8 = !{!"clang version 23.0.0git"}
!9 = !{!10, !10, i64 0}
!10 = !{!"int", !11, i64 0}
!11 = !{!"omnipotent char", !12, i64 0}
!12 = !{!"Simple C/C++ TBAA"}
!13 = !{i64 6064007760896105805}
!14 = !{i64 -2624081020897602054}
!15 = !{!16, !16, i64 0}
!16 = !{!"p1 omnipotent char", !17, i64 0}
!17 = !{!"any pointer", !11, i64 0}
!18 = !{!11, !11, i64 0}
```
Running ``llvm-lto tmp.o -filetype=asm -o tmp.s -exported-symbol main -O3`` the
output for fn is
```
fn:
.fnstart
movw r0, :lower16:var2
movw r1, :lower16:var3
movt r0, :upper16:var2
movw r2, :lower16:var1
movt r1, :upper16:var3
ldr r0, [r0]
ldr r1, [r1]
movt r2, :upper16:var1
ldr r3, [r2]
add r0, r1
add r0, r3
str r0, [r2]
bx lr
.Lfunc_end0:
.size fn, .Lfunc_end0-fn
.cantunwind
.fnend
```
without the guid metadata the output for fn is
```
fn:
.fnstart
movw r0, :lower16:.L_MergedGlobals
movw r2, :lower16:var1
movt r0, :upper16:.L_MergedGlobals
ldrd r0, r1, [r0]
movt r2, :upper16:var1
ldr r3, [r2]
add r0, r1
add r0, r3
str r0, [r2]
bx lr
.Lfunc_end0:
.size fn, .Lfunc_end0-fn
.cantunwind
.fnend
```
https://github.com/llvm/llvm-project/pull/184065
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits