https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116102
Bug ID: 116102 Summary: static_cast to derived& of base& produced unused gimple ir and affected the correctness of -fsanitize=vptr Product: gcc Version: 12.3.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c++ Assignee: unassigned at gcc dot gnu.org Reporter: wso4133560 at gmail dot com Target Milestone: --- Environment: x86-64 ubuntu22.04 gcc-12.3.0 Test code: [code] #include <iostream> struct Base { Base() {} virtual ~Base() {} virtual void func() { std::cout << "Base: " << __FUNCTION__ << "\n"; } }; struct Derived : Base {}; int main(int argc, const char *argv[]) { Base b; (void)(argc); (void)(argv); // this is undefined behavior, but will generally work Derived &d = static_cast<Derived &>(b); d.func(); return 0; } [/code] Compile command: g++ -fsanitize=vptr strict-cast.cpp -o strict-cast. In LLVM, use -fsanitize=vptr to complie strict-cast.cpp and run strict-cast, you will get an error report at "Derived &d = static_cast<Derived &>(b);" line. " cfi_cast_strict.cpp:22:18: runtime error: downcast of address 0x7ffc75e30510 which does not point to an object of type 'Derived' 0x7ffc75e30510: note: object is of type 'Base' 00 00 00 00 d8 ac df 65 31 5c 00 00 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 90 9d 16 26 ^~~~~~~~~~~~~~~~~~~~~~~ vptr for 'Base' SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior cfi_cast_strict.cpp:22:18 " But in GCC, use -fsanitize=vptr to complie strict-cast.cpp and run strict-cast, you will get "Base: func" print. No error, no abort report. I change GCC code to print detail gimple ir and get LLVM IR. By comparing, we can find out why there are differences between GCC and LLVM. This is main GCC gimple IR. [code] main:***************************** <bb 2> : Base::Base (&b); d_12 = &b; _1 = &d_12->D.46085; _13 = &d_12->D.46085; _2 = _13->_vptr.Base; _3 = (long unsigned int) _2; _22 = _3 ^ 13106270311721450772; _23 = _22 * 11376068507788127593; _24 = _23 << 47; _25 = _3 ^ _23; _26 = _24 ^ _25; _27 = _26 * 11376068507788127593; _28 = _27 << 47; _29 = _27 ^ _28; _30 = _29 * 11376068507788127593; _31 = _30 & 127; _32 = __ubsan_vptr_type_cache[_31]; if (_32 != _30) goto <bb 8>; [0.05%] else goto <bb 7>; [99.95%] <bb 8> : __builtin___ubsan_handle_dynamic_type_cache_miss (&*.Lubsan_data0, _13, _30); <bb 7> : _4 = func; _5 = &d_12->D.46085; OBJ_TYPE_REF(_4;(struct Base)_1->2B) (_5); <bb 3> : _16 = 0; _6 = b._vptr.Base; _7 = (long unsigned int) _6; _33 = _7 ^ 13106270311721450772; _34 = _33 * 11376068507788127593; _35 = _34 << 47; _36 = _7 ^ _34; _37 = _35 ^ _36; _38 = _37 * 11376068507788127593; _39 = _38 << 47; _40 = _38 ^ _39; _41 = _40 * 11376068507788127593; _42 = _41 & 127; _43 = __ubsan_vptr_type_cache[_42]; if (_43 != _41) goto <bb 10>; [0.05%] else goto <bb 9>; [99.95%] <bb 10> : __builtin___ubsan_handle_dynamic_type_cache_miss (&*.Lubsan_data1, &b, _41); <bb 9> : Base::~Base (&b); b ={v} {CLOBBER(eol)}; <bb 4> : <L2>: return _16; <bb 5> : <L3>: _8 = b._vptr.Base; _9 = (long unsigned int) _8; _44 = _9 ^ 13106270311721450772; _45 = _44 * 11376068507788127593; _46 = _45 << 47; _47 = _9 ^ _45; _48 = _46 ^ _47; _49 = _48 * 11376068507788127593; _50 = _49 << 47; _51 = _49 ^ _50; _52 = _51 * 11376068507788127593; _53 = _52 & 127; _54 = __ubsan_vptr_type_cache[_53]; if (_54 != _52) goto <bb 12>; [0.05%] else goto <bb 11>; [99.95%] <bb 12> : __builtin___ubsan_handle_dynamic_type_cache_miss (&*.Lubsan_data2, &b, _52); <bb 11> : Base::~Base (&b); resx 2 <bb 6> : <L4>: resx 1 [/code] This is main LLVM IR. [code] ; Function Attrs: mustprogress noinline norecurse optnone uwtable define dso_local noundef i32 @main(i32 noundef %0, i8** noundef %1) #4 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { %3 = alloca i32, align 4 %4 = alloca i32, align 4 %5 = alloca i8**, align 8 %6 = alloca %struct.Base, align 8 %7 = alloca %struct.Derived*, align 8 %8 = alloca i8*, align 8 %9 = alloca i32, align 4 store i32 0, i32* %3, align 4 store i32 %0, i32* %4, align 4 store i8** %1, i8*** %5, align 8 call void @_ZN4BaseC2Ev(%struct.Base* noundef nonnull align 8 dereferenceable(8) %6) %10 = bitcast %struct.Base* %6 to %struct.Derived* %11 = bitcast %struct.Derived* %10 to i64*, !nosanitize !8 %12 = load i64, i64* %11, align 8, !nosanitize !8 %13 = xor i64 6502632529789636472, %12, !nosanitize !8 %14 = mul i64 %13, -7070675565921424023, !nosanitize !8 %15 = lshr i64 %14, 47, !nosanitize !8 %16 = xor i64 %15, %14, !nosanitize !8 %17 = xor i64 %12, %16, !nosanitize !8 %18 = mul i64 %17, -7070675565921424023, !nosanitize !8 %19 = lshr i64 %18, 47, !nosanitize !8 %20 = xor i64 %19, %18, !nosanitize !8 %21 = mul i64 %20, -7070675565921424023, !nosanitize !8 %22 = and i64 %21, 127, !nosanitize !8 %23 = getelementptr inbounds [128 x i64], [128 x i64]* @__ubsan_vptr_type_cache, i32 0, i64 %22, !nosanitize !8 %24 = load i64, i64* %23, align 8, !nosanitize !8 %25 = icmp eq i64 %24, %21, !nosanitize !8 br i1 %25, label %28, label %26, !prof !9, !nosanitize !8 26: ; preds = %2 %27 = ptrtoint %struct.Derived* %10 to i64, !nosanitize !8 call void @__ubsan_handle_dynamic_type_cache_miss(i8* bitcast ({ { [22 x i8]*, i32, i32 }, { i16, i16, [10 x i8] }*, i8*, i8 }* @anon.80aadc1508201d57d5f785dcad7efd1f.1 to i8*), i64 %27, i64 %21) #3, !nosanitize !8 br label %28, !nosanitize !8 28: ; preds = %26, %2 store %struct.Derived* %10, %struct.Derived** %7, align 8 %29 = load %struct.Derived*, %struct.Derived** %7, align 8 %30 = bitcast %struct.Derived* %29 to %struct.Base* %31 = icmp ne %struct.Base* %30, null, !nosanitize !8 br i1 %31, label %32, label %51, !nosanitize !8 32: ; preds = %28 %33 = bitcast %struct.Base* %30 to i64*, !nosanitize !8 %34 = load i64, i64* %33, align 8, !nosanitize !8 %35 = xor i64 -5441721041627791282, %34, !nosanitize !8 %36 = mul i64 %35, -7070675565921424023, !nosanitize !8 %37 = lshr i64 %36, 47, !nosanitize !8 %38 = xor i64 %37, %36, !nosanitize !8 %39 = xor i64 %34, %38, !nosanitize !8 %40 = mul i64 %39, -7070675565921424023, !nosanitize !8 %41 = lshr i64 %40, 47, !nosanitize !8 %42 = xor i64 %41, %40, !nosanitize !8 %43 = mul i64 %42, -7070675565921424023, !nosanitize !8 %44 = and i64 %43, 127, !nosanitize !8 %45 = getelementptr inbounds [128 x i64], [128 x i64]* @__ubsan_vptr_type_cache, i32 0, i64 %44, !nosanitize !8 %46 = load i64, i64* %45, align 8, !nosanitize !8 %47 = icmp eq i64 %46, %43, !nosanitize !8 br i1 %47, label %50, label %48, !prof !9, !nosanitize !8 48: ; preds = %32 %49 = ptrtoint %struct.Base* %30 to i64, !nosanitize !8 call void @__ubsan_handle_dynamic_type_cache_miss(i8* bitcast ({ { [22 x i8]*, i32, i32 }, { i16, i16, [7 x i8] }*, i8*, i8 }* @anon.80aadc1508201d57d5f785dcad7efd1f.3 to i8*), i64 %49, i64 %43) #3, !nosanitize !8 br label %50, !nosanitize !8 50: ; preds = %48, %32 br label %51, !nosanitize !8 51: ; preds = %50, %28 %52 = bitcast %struct.Base* %30 to void (%struct.Base*)*** %53 = load void (%struct.Base*)**, void (%struct.Base*)*** %52, align 8 %54 = getelementptr inbounds void (%struct.Base*)*, void (%struct.Base*)** %53, i64 2 %55 = load void (%struct.Base*)*, void (%struct.Base*)** %54, align 8 invoke void %55(%struct.Base* noundef nonnull align 8 dereferenceable(8) %30) to label %56 unwind label %58 56: ; preds = %51 store i32 0, i32* %3, align 4 call void @_ZN4BaseD2Ev(%struct.Base* noundef nonnull align 8 dereferenceable(8) %6) #3 %57 = load i32, i32* %3, align 4 ret i32 %57 58: ; preds = %51 %59 = landingpad { i8*, i32 } cleanup %60 = extractvalue { i8*, i32 } %59, 0 store i8* %60, i8** %8, align 8 %61 = extractvalue { i8*, i32 } %59, 1 store i32 %61, i32* %9, align 4 call void @_ZN4BaseD2Ev(%struct.Base* noundef nonnull align 8 dereferenceable(8) %6) #3 br label %62 62: ; preds = %58 %63 = load i8*, i8** %8, align 8 %64 = load i32, i32* %9, align 4 %65 = insertvalue { i8*, i32 } undef, i8* %63, 0 %66 = insertvalue { i8*, i32 } %65, i32 %64, 1 resume { i8*, i32 } %66 } [/code] By comparing, we can find out after Base construct, LLVM use "%10 = bitcast %struct.Base* %6 to %struct.Derived* %11 = bitcast %struct.Derived* %10 to i64*, !nosanitize !8" to change Base* type to Derived* type, but in GCC it produced an unused gimple ir "_1 = &d_12->D.46085;". GCC and LLVM compilers check by function's type; GCC does not have a Base* to Derived* cast action.This made GCC always true in "Derived &d = static_cast<Derived &>(b);" line's check. If i change "Derived &d = static_cast<Derived &>(b);" to "Derived *d = static_cast<Derived *>(b)"; this check work well. Would you mind checking this problem for me? Thanks.