vsk updated this revision to Diff 101479. vsk marked 3 inline comments as done. vsk added a comment.
Thanks for the review comments. I've changed the calls which use Builder as suggested, and fixed up the tests. It sounds like this patch is in good shape, so I'll commit this after two days provided that there's no blocking feedback. https://reviews.llvm.org/D33910 Files: lib/CodeGen/CGExpr.cpp lib/CodeGen/CGExprScalar.cpp lib/CodeGen/CodeGenFunction.h test/CodeGen/ubsan-pointer-overflow.m
Index: test/CodeGen/ubsan-pointer-overflow.m =================================================================== --- test/CodeGen/ubsan-pointer-overflow.m +++ test/CodeGen/ubsan-pointer-overflow.m @@ -5,17 +5,13 @@ // CHECK: [[BASE:%.*]] = ptrtoint i8* {{.*}} to i64, !nosanitize // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 [[BASE]], 1, !nosanitize // CHECK-NEXT: [[POSVALID:%.*]] = icmp uge i64 [[COMPGEP]], [[BASE]], !nosanitize - // CHECK-NEXT: [[NEGVALID:%.*]] = icmp ult i64 [[COMPGEP]], [[BASE]], !nosanitize - // CHECK-NEXT: [[DIFFVALID:%.*]] = select i1 true, i1 [[POSVALID]], i1 [[NEGVALID]], !nosanitize - // CHECK-NEXT: [[VALID:%.*]] = and i1 true, [[DIFFVALID]], !nosanitize - // CHECK-NEXT: br i1 [[VALID]]{{.*}}, !nosanitize + // CHECK-NEXT: br i1 [[POSVALID]]{{.*}}, !nosanitize // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}, i64 [[BASE]], i64 [[COMPGEP]]){{.*}}, !nosanitize ++p; // CHECK: ptrtoint i8* {{.*}} to i64, !nosanitize // CHECK-NEXT: add i64 {{.*}}, -1, !nosanitize // CHECK: select i1 false{{.*}}, !nosanitize - // CHECK-NEXT: and i1 true{{.*}}, !nosanitize // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} --p; @@ -30,16 +26,35 @@ void binary_arith(char *p, int i) { // CHECK: [[SMUL:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %{{.*}}), !nosanitize // CHECK-NEXT: [[SMULOFLOW:%.*]] = extractvalue { i64, i1 } [[SMUL]], 1, !nosanitize - // CHECK-NEXT: [[OFFSETOFLOW:%.*]] = or i1 false, [[SMULOFLOW]], !nosanitize // CHECK-NEXT: [[SMULVAL:%.*]] = extractvalue { i64, i1 } [[SMUL]], 0, !nosanitize // CHECK-NEXT: [[BASE:%.*]] = ptrtoint i8* {{.*}} to i64, !nosanitize // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 [[BASE]], [[SMULVAL]], !nosanitize + // CHECK-NEXT: [[OFFSETVALID:%.*]] = xor i1 [[SMULOFLOW]], true, !nosanitize // CHECK-NEXT: [[POSVALID:%.*]] = icmp uge i64 [[COMPGEP]], [[BASE]], !nosanitize - // CHECK-NEXT: [[NEGVALID:%.*]] = icmp ult i64 [[COMPGEP]], [[BASE]], !nosanitize // CHECK-NEXT: [[POSOFFSET:%.*]] = icmp sge i64 [[SMULVAL]], 0, !nosanitize - // CHECK-DAG: [[OFFSETVALID:%.*]] = xor i1 [[OFFSETOFLOW]], true, !nosanitize - // CHECK-DAG: [[DIFFVALID:%.*]] = select i1 [[POSOFFSET]], i1 [[POSVALID]], i1 [[NEGVALID]], !nosanitize - // CHECK: [[VALID:%.*]] = and i1 [[OFFSETVALID]], [[DIFFVALID]], !nosanitize + // CHECK-NEXT: [[NEGVALID:%.*]] = icmp ult i64 [[COMPGEP]], [[BASE]], !nosanitize + // CHECK-NEXT: [[DIFFVALID:%.*]] = select i1 [[POSOFFSET]], i1 [[POSVALID]], i1 [[NEGVALID]], !nosanitize + // CHECK: [[VALID:%.*]] = and i1 [[DIFFVALID]], [[OFFSETVALID]], !nosanitize + // CHECK-NEXT: br i1 [[VALID]]{{.*}}, !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}, i64 [[BASE]], i64 [[COMPGEP]]){{.*}}, !nosanitize + p + i; + + // CHECK: [[OFFSET:%.*]] = sub i64 0, {{.*}} + // CHECK-NEXT: getelementptr inbounds {{.*}} [[OFFSET]] + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + p - i; +} + +// CHECK-LABEL: define void @binary_arith_unsigned +void binary_arith_unsigned(char *p, unsigned i) { + // CHECK: [[SMUL:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %{{.*}}), !nosanitize + // CHECK-NEXT: [[SMULOFLOW:%.*]] = extractvalue { i64, i1 } [[SMUL]], 1, !nosanitize + // CHECK-NEXT: [[SMULVAL:%.*]] = extractvalue { i64, i1 } [[SMUL]], 0, !nosanitize + // CHECK-NEXT: [[BASE:%.*]] = ptrtoint i8* {{.*}} to i64, !nosanitize + // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 [[BASE]], [[SMULVAL]], !nosanitize + // CHECK-NEXT: [[OFFSETVALID:%.*]] = xor i1 [[SMULOFLOW]], true, !nosanitize + // CHECK-NEXT: [[POSVALID:%.*]] = icmp uge i64 [[COMPGEP]], [[BASE]], !nosanitize + // CHECK: [[VALID:%.*]] = and i1 [[POSVALID]], [[OFFSETVALID]], !nosanitize // CHECK-NEXT: br i1 [[VALID]]{{.*}}, !nosanitize // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}, i64 [[BASE]], i64 [[COMPGEP]]){{.*}}, !nosanitize p + i; @@ -55,16 +70,15 @@ // CHECK: getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARR:%.*]], i64 0, i64 [[IDXPROM:%.*]] // CHECK-NEXT: [[SMUL:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 40, i64 [[IDXPROM]]), !nosanitize // CHECK-NEXT: [[SMULOFLOW:%.*]] = extractvalue { i64, i1 } [[SMUL]], 1, !nosanitize - // CHECK-NEXT: [[OFFSETOFLOW:%.*]] = or i1 false, [[SMULOFLOW]], !nosanitize // CHECK-NEXT: [[SMULVAL:%.*]] = extractvalue { i64, i1 } [[SMUL]], 0, !nosanitize // CHECK-NEXT: [[BASE:%.*]] = ptrtoint [10 x [10 x i32]]* [[ARR]] to i64, !nosanitize // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 [[BASE]], [[SMULVAL]], !nosanitize + // CHECK-NEXT: [[OFFSETVALID:%.*]] = xor i1 [[SMULOFLOW]], true, !nosanitize // CHECK-NEXT: [[POSVALID:%.*]] = icmp uge i64 [[COMPGEP]], [[BASE]], !nosanitize - // CHECK-NEXT: [[NEGVALID:%.*]] = icmp ult i64 [[COMPGEP]], [[BASE]], !nosanitize // CHECK-NEXT: [[POSOFFSET:%.*]] = icmp sge i64 [[SMULVAL]], 0, !nosanitize - // CHECK-DAG: [[OFFSETVALID:%.*]] = xor i1 [[OFFSETOFLOW]], true, !nosanitize - // CHECK-DAG: [[DIFFVALID:%.*]] = select i1 [[POSOFFSET]], i1 [[POSVALID]], i1 [[NEGVALID]], !nosanitize - // CHECK: [[VALID:%.*]] = and i1 [[OFFSETVALID]], [[DIFFVALID]], !nosanitize + // CHECK-NEXT: [[NEGVALID:%.*]] = icmp ult i64 [[COMPGEP]], [[BASE]], !nosanitize + // CHECK-NEXT: [[DIFFVALID:%.*]] = select i1 [[POSOFFSET]], i1 [[POSVALID]], i1 [[NEGVALID]], !nosanitize + // CHECK: [[VALID:%.*]] = and i1 [[DIFFVALID]], [[OFFSETVALID]], !nosanitize // CHECK-NEXT: br i1 [[VALID]]{{.*}}, !nosanitize // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}, i64 [[BASE]], i64 [[COMPGEP]]){{.*}}, !nosanitize @@ -101,6 +115,24 @@ arr[k][k]; } +// CHECK-LABEL: define void @pointer_array_unsigned_indices +void pointer_array_unsigned_indices(int **arr, unsigned k) { + // CHECK-NOT: select + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + // CHECK-NOT: select + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + arr[k][k]; +} + +// CHECK-LABEL: define void @pointer_array_mixed_indices +void pointer_array_mixed_indices(int **arr, int i, unsigned j) { + // CHECK: select + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + // CHECK-NOT: select + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + arr[i][j]; +} + struct S1 { int pad1; union { Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -3554,8 +3554,10 @@ /// Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to /// detect undefined behavior when the pointer overflow sanitizer is enabled. + /// \p SignedIndices indicates whether any of the GEP indices are signed. llvm::Value *EmitCheckedInBoundsGEP(llvm::Value *Ptr, ArrayRef<llvm::Value *> IdxList, + bool SignedIndices, SourceLocation Loc, const Twine &Name = ""); Index: lib/CodeGen/CGExprScalar.cpp =================================================================== --- lib/CodeGen/CGExprScalar.cpp +++ lib/CodeGen/CGExprScalar.cpp @@ -1851,6 +1851,7 @@ llvm::Value *input; int amount = (isInc ? 1 : -1); + bool signedIndex = !isInc; if (const AtomicType *atomicTy = type->getAs<AtomicType>()) { type = atomicTy->getValueType(); @@ -1940,8 +1941,8 @@ if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, numElts, "vla.inc"); else - value = CGF.EmitCheckedInBoundsGEP(value, numElts, E->getExprLoc(), - "vla.inc"); + value = CGF.EmitCheckedInBoundsGEP(value, numElts, signedIndex, + E->getExprLoc(), "vla.inc"); // Arithmetic on function pointers (!) is just +-1. } else if (type->isFunctionType()) { @@ -1951,18 +1952,18 @@ if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, amt, "incdec.funcptr"); else - value = CGF.EmitCheckedInBoundsGEP(value, amt, E->getExprLoc(), - "incdec.funcptr"); + value = CGF.EmitCheckedInBoundsGEP(value, amt, signedIndex, + E->getExprLoc(), "incdec.funcptr"); value = Builder.CreateBitCast(value, input->getType()); // For everything else, we can just do a simple increment. } else { llvm::Value *amt = Builder.getInt32(amount); if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, amt, "incdec.ptr"); else - value = CGF.EmitCheckedInBoundsGEP(value, amt, E->getExprLoc(), - "incdec.ptr"); + value = CGF.EmitCheckedInBoundsGEP(value, amt, signedIndex, + E->getExprLoc(), "incdec.ptr"); } // Vector increment/decrement. @@ -2043,8 +2044,8 @@ if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, sizeValue, "incdec.objptr"); else - value = CGF.EmitCheckedInBoundsGEP(value, sizeValue, E->getExprLoc(), - "incdec.objptr"); + value = CGF.EmitCheckedInBoundsGEP(value, sizeValue, signedIndex, + E->getExprLoc(), "incdec.objptr"); value = Builder.CreateBitCast(value, input->getType()); } @@ -2661,13 +2662,14 @@ std::swap(pointerOperand, indexOperand); } + bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType(); + unsigned width = cast<llvm::IntegerType>(index->getType())->getBitWidth(); auto &DL = CGF.CGM.getDataLayout(); auto PtrTy = cast<llvm::PointerType>(pointer->getType()); if (width != DL.getTypeSizeInBits(PtrTy)) { // Zero-extend or sign-extend the pointer value according to // whether the index is signed or not. - bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType(); index = CGF.Builder.CreateIntCast(index, DL.getIntPtrType(PtrTy), isSigned, "idx.ext"); } @@ -2711,8 +2713,8 @@ pointer = CGF.Builder.CreateGEP(pointer, index, "add.ptr"); } else { index = CGF.Builder.CreateNSWMul(index, numElements, "vla.index"); - pointer = CGF.EmitCheckedInBoundsGEP(pointer, index, op.E->getExprLoc(), - "add.ptr"); + pointer = CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, + op.E->getExprLoc(), "add.ptr"); } return pointer; } @@ -2729,8 +2731,8 @@ if (CGF.getLangOpts().isSignedOverflowDefined()) return CGF.Builder.CreateGEP(pointer, index, "add.ptr"); - return CGF.EmitCheckedInBoundsGEP(pointer, index, op.E->getExprLoc(), - "add.ptr"); + return CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, + op.E->getExprLoc(), "add.ptr"); } // Construct an fmuladd intrinsic to represent a fused mul-add of MulOp and @@ -3848,6 +3850,7 @@ Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, ArrayRef<Value *> IdxList, + bool SignedIndices, SourceLocation Loc, const Twine &Name) { Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name); @@ -3905,7 +3908,7 @@ auto *ResultAndOverflow = Builder.CreateCall( (Opcode == BO_Add) ? SAddIntrinsic : SMulIntrinsic, {LHS, RHS}); OffsetOverflows = Builder.CreateOr( - OffsetOverflows, Builder.CreateExtractValue(ResultAndOverflow, 1)); + Builder.CreateExtractValue(ResultAndOverflow, 1), OffsetOverflows); return Builder.CreateExtractValue(ResultAndOverflow, 0); }; @@ -3951,12 +3954,18 @@ // 1) The total offset doesn't overflow, and // 2) The sign of the difference between the computed address and the base // pointer matches the sign of the total offset. - llvm::Value *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); - llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr); - auto *PosOrZeroOffset = Builder.CreateICmpSGE(TotalOffset, Zero); - llvm::Value *ValidGEP = Builder.CreateAnd( - Builder.CreateNot(OffsetOverflows), - Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid)); + llvm::Value *ValidGEP; + auto *NoOffsetOverflow = Builder.CreateNot(OffsetOverflows); + auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); + if (SignedIndices) { + auto *PosOrZeroOffset = Builder.CreateICmpSGE(TotalOffset, Zero); + llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr); + ValidGEP = Builder.CreateAnd( + Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid), + NoOffsetOverflow); + } else { + ValidGEP = Builder.CreateAnd(PosOrZeroValid, NoOffsetOverflow); + } llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc)}; // Pass the computed GEP to the runtime to avoid emitting poisoned arguments. Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -3002,10 +3002,11 @@ llvm::Value *ptr, ArrayRef<llvm::Value*> indices, bool inbounds, + bool signedIndices, SourceLocation loc, const llvm::Twine &name = "arrayidx") { if (inbounds) { - return CGF.EmitCheckedInBoundsGEP(ptr, indices, loc, name); + return CGF.EmitCheckedInBoundsGEP(ptr, indices, signedIndices, loc, name); } else { return CGF.Builder.CreateGEP(ptr, indices, name); } @@ -3038,7 +3039,7 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr, ArrayRef<llvm::Value *> indices, QualType eltType, bool inbounds, - SourceLocation loc, + bool signedIndices, SourceLocation loc, const llvm::Twine &name = "arrayidx") { // All the indices except that last must be zero. #ifndef NDEBUG @@ -3058,8 +3059,8 @@ CharUnits eltAlign = getArrayElementAlign(addr.getAlignment(), indices.back(), eltSize); - llvm::Value *eltPtr = - emitArraySubscriptGEP(CGF, addr.getPointer(), indices, inbounds, loc, name); + llvm::Value *eltPtr = emitArraySubscriptGEP( + CGF, addr.getPointer(), indices, inbounds, signedIndices, loc, name); return Address(eltPtr, eltAlign); } @@ -3069,6 +3070,7 @@ // in lexical order (this complexity is, sadly, required by C++17). llvm::Value *IdxPre = (E->getLHS() == E->getIdx()) ? EmitScalarExpr(E->getIdx()) : nullptr; + bool SignedIndices = false; auto EmitIdxAfterBase = [&, IdxPre](bool Promote) -> llvm::Value * { auto *Idx = IdxPre; if (E->getLHS() != E->getIdx()) { @@ -3078,6 +3080,7 @@ QualType IdxTy = E->getIdx()->getType(); bool IdxSigned = IdxTy->isSignedIntegerOrEnumerationType(); + SignedIndices |= IdxSigned; if (SanOpts.has(SanitizerKind::ArrayBounds)) EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, Accessed); @@ -3113,7 +3116,7 @@ QualType EltType = LV.getType()->castAs<VectorType>()->getElementType(); Addr = emitArraySubscriptGEP(*this, Addr, Idx, EltType, /*inbounds*/ true, - E->getExprLoc()); + SignedIndices, E->getExprLoc()); return MakeAddrLValue(Addr, EltType, LV.getBaseInfo()); } @@ -3142,7 +3145,7 @@ Addr = emitArraySubscriptGEP(*this, Addr, Idx, vla->getElementType(), !getLangOpts().isSignedOverflowDefined(), - E->getExprLoc()); + SignedIndices, E->getExprLoc()); } else if (const ObjCObjectType *OIT = E->getType()->getAs<ObjCObjectType>()){ // Indexing over an interface, as in "NSString *P; P[4];" @@ -3167,8 +3170,9 @@ // Do the GEP. CharUnits EltAlign = getArrayElementAlign(Addr.getAlignment(), Idx, InterfaceSize); - llvm::Value *EltPtr = emitArraySubscriptGEP( - *this, Addr.getPointer(), ScaledIdx, false, E->getExprLoc()); + llvm::Value *EltPtr = + emitArraySubscriptGEP(*this, Addr.getPointer(), ScaledIdx, false, + SignedIndices, E->getExprLoc()); Addr = Address(EltPtr, EltAlign); // Cast back. @@ -3190,19 +3194,18 @@ auto *Idx = EmitIdxAfterBase(/*Promote*/true); // Propagate the alignment from the array itself to the result. - Addr = emitArraySubscriptGEP(*this, ArrayLV.getAddress(), - {CGM.getSize(CharUnits::Zero()), Idx}, - E->getType(), - !getLangOpts().isSignedOverflowDefined(), - E->getExprLoc()); + Addr = emitArraySubscriptGEP( + *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, + E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, + E->getExprLoc()); BaseInfo = ArrayLV.getBaseInfo(); } else { // The base must be a pointer; emit it with an estimate of its alignment. Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); Addr = emitArraySubscriptGEP(*this, Addr, Idx, E->getType(), !getLangOpts().isSignedOverflowDefined(), - E->getExprLoc()); + SignedIndices, E->getExprLoc()); } LValue LV = MakeAddrLValue(Addr, E->getType(), BaseInfo); @@ -3375,7 +3378,7 @@ Idx = Builder.CreateNSWMul(Idx, NumElements); EltPtr = emitArraySubscriptGEP(*this, Base, Idx, VLA->getElementType(), !getLangOpts().isSignedOverflowDefined(), - E->getExprLoc()); + /*SignedIndices=*/false, E->getExprLoc()); } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) { // If this is A[i] where A is an array, the frontend will have decayed the // base to be a ArrayToPointerDecay implicit cast. While correct, it is @@ -3395,14 +3398,14 @@ EltPtr = emitArraySubscriptGEP( *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, ResultExprTy, !getLangOpts().isSignedOverflowDefined(), - E->getExprLoc()); + /*SignedIndices=*/false, E->getExprLoc()); BaseInfo = ArrayLV.getBaseInfo(); } else { Address Base = emitOMPArraySectionBase(*this, E->getBase(), BaseInfo, BaseTy, ResultExprTy, IsLowerBound); EltPtr = emitArraySubscriptGEP(*this, Base, Idx, ResultExprTy, !getLangOpts().isSignedOverflowDefined(), - E->getExprLoc()); + /*SignedIndices=*/false, E->getExprLoc()); } return MakeAddrLValue(EltPtr, ResultExprTy, BaseInfo);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits