Hi Sanjay,

On 32-bit ARM your patch appears to increase code size of BZ2_decompress from 
SPEC2006’s 401.bzip2 by 50% — from 7.5K to 11K.  This increases overall code 
size of 401.bzip2 benchmark by 10%.

Would you please investigate?

Please let us know if you need help reproducing the problem.

Regards,

--
Maxim Kuvyrkov
https://www.linaro.org

> On 14 Jul 2021, at 17:32, ci_not...@linaro.org wrote:
> 
> Successfully identified regression in *llvm* in CI configuration 
> tcwg_bmk_llvm_tk1/llvm-master-arm-spec2k6-Os.  So far, this commit has 
> regressed CI configurations:
> - tcwg_bmk_llvm_tk1/llvm-master-arm-spec2k6-Os
> 
> Culprit:
> <cut>
> commit 40b752d28d95158e52dba7cfeea92e41b7ccff9a
> Author: Sanjay Patel <spa...@rotateright.com>
> Date:   Mon Jul 5 09:57:39 2021 -0400
> 
>    [InstCombine] fold icmp slt/sgt of offset value with constant
> 
>    This follows up patches for the unsigned siblings:
>    0c400e895306
>    c7b658aeb526
> 
>    We are translating an offset signed compare to its
>    unsigned equivalent when one end of the range is
>    at the limit (zero or unsigned max).
> 
>    (X + C2) >s C --> X <u (SMAX - C) (if C == C2 - 1)
>    (X + C2) <s C --> X >u (C ^ SMAX) (if C == C2)
> 
>    This probably does not show up much in IR derived
>    from C/C++ source because that would likely have
>    'nsw', and we have folds for that already.
> 
>    As with the previous unsigned transforms, the folds
>    could be generalized to handle non-constant patterns:
> 
>    https://alive2.llvm.org/ce/z/Y8Xrrm
> 
>      ; sgt
>      define i1 @src(i8 %a, i8 %c) {
>        %c2 = add i8 %c, 1
>        %t = add i8 %a, %c2
>        %ov = icmp sgt i8 %t, %c
>        ret i1 %ov
>      }
> 
>      define i1 @tgt(i8 %a, i8 %c) {
>        %c_off = sub i8 127, %c ; SMAX
>        %ov = icmp ult i8 %a, %c_off
>        ret i1 %ov
>      }
> 
>    https://alive2.llvm.org/ce/z/c8uhnk
> 
>      ; slt
>      define i1 @src(i8 %a, i8 %c) {
>        %t = add i8 %a, %c
>        %ov = icmp slt i8 %t, %c
>        ret i1 %ov
>      }
> 
>      define i1 @tgt(i8 %a, i8 %c) {
>        %c_offnot = xor i8 %c, 127 ; SMAX
>        %ov = icmp ugt i8 %a, %c_offnot
>        ret i1 %ov
>      }
> </cut>
> 
> Results regressed to (for first_bad == 
> 40b752d28d95158e52dba7cfeea92e41b7ccff9a)
> # reset_artifacts:
> -10
> # build_abe binutils:
> -9
> # build_abe stage1 -- --set gcc_override_configure=--with-mode=thumb --set 
> gcc_override_configure=--disable-libsanitizer:
> -8
> # build_abe linux:
> -7
> # build_abe glibc:
> -6
> # build_abe stage2 -- --set gcc_override_configure=--with-mode=thumb --set 
> gcc_override_configure=--disable-libsanitizer:
> -5
> # build_llvm true:
> -3
> # true:
> 0
> # benchmark -Os_mthumb -- 
> artifacts/build-40b752d28d95158e52dba7cfeea92e41b7ccff9a/results_id:
> 1
> # 401.bzip2,bzip2_base.default                                  regressed by 
> 110
> # 401.bzip2,[.] BZ2_decompress                                  regressed by 
> 149
> 
> from (for last_good == 32dd914f7182875730eb3453f39dcc584b7219b2)
> # reset_artifacts:
> -10
> # build_abe binutils:
> -9
> # build_abe stage1 -- --set gcc_override_configure=--with-mode=thumb --set 
> gcc_override_configure=--disable-libsanitizer:
> -8
> # build_abe linux:
> -7
> # build_abe glibc:
> -6
> # build_abe stage2 -- --set gcc_override_configure=--with-mode=thumb --set 
> gcc_override_configure=--disable-libsanitizer:
> -5
> # build_llvm true:
> -3
> # true:
> 0
> # benchmark -Os_mthumb -- 
> artifacts/build-32dd914f7182875730eb3453f39dcc584b7219b2/results_id:
> 1
> 
> Artifacts of last_good build: 
> https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-arm-spec2k6-Os/9/artifact/artifacts/build-32dd914f7182875730eb3453f39dcc584b7219b2/
> Results ID of last_good: 
> tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-master-arm-spec2k6-Os/1631
> Artifacts of first_bad build: 
> https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-arm-spec2k6-Os/9/artifact/artifacts/build-40b752d28d95158e52dba7cfeea92e41b7ccff9a/
> Results ID of first_bad: 
> tk1_32/tcwg_bmk_llvm_tk1/bisect-llvm-master-arm-spec2k6-Os/1622
> Build top page/logs: 
> https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-arm-spec2k6-Os/9/
> 
> Configuration details:
> 
> 
> Reproduce builds:
> <cut>
> mkdir investigate-llvm-40b752d28d95158e52dba7cfeea92e41b7ccff9a
> cd investigate-llvm-40b752d28d95158e52dba7cfeea92e41b7ccff9a
> 
> git clone https://git.linaro.org/toolchain/jenkins-scripts
> 
> mkdir -p artifacts/manifests
> curl -o artifacts/manifests/build-baseline.sh 
> https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-arm-spec2k6-Os/9/artifact/artifacts/manifests/build-baseline.sh
>  --fail
> curl -o artifacts/manifests/build-parameters.sh 
> https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-arm-spec2k6-Os/9/artifact/artifacts/manifests/build-parameters.sh
>  --fail
> curl -o artifacts/test.sh 
> https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-arm-spec2k6-Os/9/artifact/artifacts/test.sh
>  --fail
> chmod +x artifacts/test.sh
> 
> # Reproduce the baseline build (build all pre-requisites)
> ./jenkins-scripts/tcwg_bmk-build.sh @@ artifacts/manifests/build-baseline.sh
> 
> # Save baseline build state (which is then restored in artifacts/test.sh)
> rsync -a --del --delete-excluded --exclude bisect/ --exclude artifacts/ 
> --exclude llvm/ ./ ./bisect/baseline/
> 
> cd llvm
> 
> # Reproduce first_bad build
> git checkout --detach 40b752d28d95158e52dba7cfeea92e41b7ccff9a
> ../artifacts/test.sh
> 
> # Reproduce last_good build
> git checkout --detach 32dd914f7182875730eb3453f39dcc584b7219b2
> ../artifacts/test.sh
> 
> cd ..
> </cut>
> 
> History of pending regressions and results: 
> https://git.linaro.org/toolchain/ci/base-artifacts.git/log/?h=linaro-local/ci/tcwg_bmk_llvm_tk1/llvm-master-arm-spec2k6-Os
> 
> Artifacts: 
> https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-arm-spec2k6-Os/9/artifact/artifacts/
> Build log: 
> https://ci.linaro.org/job/tcwg_bmk_ci_llvm-bisect-tcwg_bmk_tk1-llvm-master-arm-spec2k6-Os/9/consoleText
> 
> Full commit (up to 1000 lines):
> <cut>
> commit 40b752d28d95158e52dba7cfeea92e41b7ccff9a
> Author: Sanjay Patel <spa...@rotateright.com>
> Date:   Mon Jul 5 09:57:39 2021 -0400
> 
>    [InstCombine] fold icmp slt/sgt of offset value with constant
> 
>    This follows up patches for the unsigned siblings:
>    0c400e895306
>    c7b658aeb526
> 
>    We are translating an offset signed compare to its
>    unsigned equivalent when one end of the range is
>    at the limit (zero or unsigned max).
> 
>    (X + C2) >s C --> X <u (SMAX - C) (if C == C2 - 1)
>    (X + C2) <s C --> X >u (C ^ SMAX) (if C == C2)
> 
>    This probably does not show up much in IR derived
>    from C/C++ source because that would likely have
>    'nsw', and we have folds for that already.
> 
>    As with the previous unsigned transforms, the folds
>    could be generalized to handle non-constant patterns:
> 
>    https://alive2.llvm.org/ce/z/Y8Xrrm
> 
>      ; sgt
>      define i1 @src(i8 %a, i8 %c) {
>        %c2 = add i8 %c, 1
>        %t = add i8 %a, %c2
>        %ov = icmp sgt i8 %t, %c
>        ret i1 %ov
>      }
> 
>      define i1 @tgt(i8 %a, i8 %c) {
>        %c_off = sub i8 127, %c ; SMAX
>        %ov = icmp ult i8 %a, %c_off
>        ret i1 %ov
>      }
> 
>    https://alive2.llvm.org/ce/z/c8uhnk
> 
>      ; slt
>      define i1 @src(i8 %a, i8 %c) {
>        %t = add i8 %a, %c
>        %ov = icmp slt i8 %t, %c
>        ret i1 %ov
>      }
> 
>      define i1 @tgt(i8 %a, i8 %c) {
>        %c_offnot = xor i8 %c, 127 ; SMAX
>        %ov = icmp ugt i8 %a, %c_offnot
>        ret i1 %ov
>      }
> ---
> .../Transforms/InstCombine/InstCombineCompares.cpp  | 21 ++++++++++++++-------
> llvm/test/Transforms/InstCombine/icmp-add.ll        | 20 ++++++++++----------
> 2 files changed, 24 insertions(+), 17 deletions(-)
> 
> diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp 
> b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
> index 6bd479def210..6e66c61f5e46 100644
> --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
> +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
> @@ -2636,20 +2636,27 @@ Instruction 
> *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,
>   // Fold icmp pred (add X, C2), C.
>   Value *X = Add->getOperand(0);
>   Type *Ty = Add->getType();
> -  CmpInst::Predicate Pred = Cmp.getPredicate();
> +  const CmpInst::Predicate Pred = Cmp.getPredicate();
> +  const APInt SMax = APInt::getSignedMaxValue(Ty->getScalarSizeInBits());
> +  const APInt SMin = APInt::getSignedMinValue(Ty->getScalarSizeInBits());
> 
> -  // Fold an unsigned compare with offset to signed compare:
> +  // Fold compare with offset to opposite sign compare if it eliminates 
> offset:
>   // (X + C2) >u C --> X <s -C2 (if C == C2 + SMAX)
> -  // TODO: Find the signed predicate siblings.
> -  if (Pred == CmpInst::ICMP_UGT &&
> -      C == *C2 + APInt::getSignedMaxValue(Ty->getScalarSizeInBits()))
> +  if (Pred == CmpInst::ICMP_UGT && C == *C2 + SMax)
>     return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, -(*C2)));
> 
>   // (X + C2) <u C --> X >s ~C2 (if C == C2 + SMIN)
> -  if (Pred == CmpInst::ICMP_ULT &&
> -      C == *C2 + APInt::getSignedMinValue(Ty->getScalarSizeInBits()))
> +  if (Pred == CmpInst::ICMP_ULT && C == *C2 + SMin)
>     return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantInt::get(Ty, ~(*C2)));
> 
> +  // (X + C2) >s C --> X <u (SMAX - C) (if C == C2 - 1)
> +  if (Pred == CmpInst::ICMP_SGT && C == *C2 - 1)
> +    return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantInt::get(Ty, SMax - 
> C));
> +
> +  // (X + C2) <s C --> X >u (C ^ SMAX) (if C == C2)
> +  if (Pred == CmpInst::ICMP_SLT && C == *C2)
> +    return new ICmpInst(ICmpInst::ICMP_UGT, X, ConstantInt::get(Ty, C ^ 
> SMax));
> +
>   // If the add does not wrap, we can always adjust the compare by subtracting
>   // the constants. Equality comparisons are handled elsewhere. 
> SGE/SLE/UGE/ULE
>   // are canonicalized to SGT/SLT/UGT/ULT.
> diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll 
> b/llvm/test/Transforms/InstCombine/icmp-add.ll
> index 1f00dc6e2992..aa69325d716c 100644
> --- a/llvm/test/Transforms/InstCombine/icmp-add.ll
> +++ b/llvm/test/Transforms/InstCombine/icmp-add.ll
> @@ -842,8 +842,7 @@ define i1 @ult_wrong_offset(i8 %a) {
> 
> define i1 @sgt_offset(i8 %a) {
> ; CHECK-LABEL: @sgt_offset(
> -; CHECK-NEXT:    [[T:%.*]] = add i8 [[A:%.*]], -6
> -; CHECK-NEXT:    [[OV:%.*]] = icmp sgt i8 [[T]], -7
> +; CHECK-NEXT:    [[OV:%.*]] = icmp ult i8 [[A:%.*]], -122
> ; CHECK-NEXT:    ret i1 [[OV]]
> ;
>   %t = add i8 %a, -6
> @@ -855,7 +854,7 @@ define i1 @sgt_offset_use(i32 %a) {
> ; CHECK-LABEL: @sgt_offset_use(
> ; CHECK-NEXT:    [[T:%.*]] = add i32 [[A:%.*]], 42
> ; CHECK-NEXT:    call void @use(i32 [[T]])
> -; CHECK-NEXT:    [[OV:%.*]] = icmp sgt i32 [[T]], 41
> +; CHECK-NEXT:    [[OV:%.*]] = icmp ult i32 [[A]], 2147483606
> ; CHECK-NEXT:    ret i1 [[OV]]
> ;
>   %t = add i32 %a, 42
> @@ -866,8 +865,7 @@ define i1 @sgt_offset_use(i32 %a) {
> 
> define <2 x i1> @sgt_offset_splat(<2 x i5> %a) {
> ; CHECK-LABEL: @sgt_offset_splat(
> -; CHECK-NEXT:    [[T:%.*]] = add <2 x i5> [[A:%.*]], <i5 9, i5 9>
> -; CHECK-NEXT:    [[OV:%.*]] = icmp sgt <2 x i5> [[T]], <i5 8, i5 8>
> +; CHECK-NEXT:    [[OV:%.*]] = icmp ult <2 x i5> [[A:%.*]], <i5 7, i5 7>
> ; CHECK-NEXT:    ret <2 x i1> [[OV]]
> ;
>   %t = add <2 x i5> %a, <i5 9, i5 9>
> @@ -875,6 +873,8 @@ define <2 x i1> @sgt_offset_splat(<2 x i5> %a) {
>   ret <2 x i1> %ov
> }
> 
> +; negative test - constants must differ by 1
> +
> define i1 @sgt_wrong_offset(i8 %a) {
> ; CHECK-LABEL: @sgt_wrong_offset(
> ; CHECK-NEXT:    [[T:%.*]] = add i8 [[A:%.*]], -7
> @@ -888,8 +888,7 @@ define i1 @sgt_wrong_offset(i8 %a) {
> 
> define i1 @slt_offset(i8 %a) {
> ; CHECK-LABEL: @slt_offset(
> -; CHECK-NEXT:    [[T:%.*]] = add i8 [[A:%.*]], -6
> -; CHECK-NEXT:    [[OV:%.*]] = icmp slt i8 [[T]], -6
> +; CHECK-NEXT:    [[OV:%.*]] = icmp ugt i8 [[A:%.*]], -123
> ; CHECK-NEXT:    ret i1 [[OV]]
> ;
>   %t = add i8 %a, -6
> @@ -901,7 +900,7 @@ define i1 @slt_offset_use(i32 %a) {
> ; CHECK-LABEL: @slt_offset_use(
> ; CHECK-NEXT:    [[T:%.*]] = add i32 [[A:%.*]], 42
> ; CHECK-NEXT:    call void @use(i32 [[T]])
> -; CHECK-NEXT:    [[OV:%.*]] = icmp slt i32 [[T]], 42
> +; CHECK-NEXT:    [[OV:%.*]] = icmp ugt i32 [[A]], 2147483605
> ; CHECK-NEXT:    ret i1 [[OV]]
> ;
>   %t = add i32 %a, 42
> @@ -912,8 +911,7 @@ define i1 @slt_offset_use(i32 %a) {
> 
> define <2 x i1> @slt_offset_splat(<2 x i5> %a) {
> ; CHECK-LABEL: @slt_offset_splat(
> -; CHECK-NEXT:    [[T:%.*]] = add <2 x i5> [[A:%.*]], <i5 9, i5 9>
> -; CHECK-NEXT:    [[OV:%.*]] = icmp slt <2 x i5> [[T]], <i5 9, i5 9>
> +; CHECK-NEXT:    [[OV:%.*]] = icmp ugt <2 x i5> [[A:%.*]], <i5 6, i5 6>
> ; CHECK-NEXT:    ret <2 x i1> [[OV]]
> ;
>   %t = add <2 x i5> %a, <i5 9, i5 9>
> @@ -921,6 +919,8 @@ define <2 x i1> @slt_offset_splat(<2 x i5> %a) {
>   ret <2 x i1> %ov
> }
> 
> +; negative test - constants must be equal
> +
> define i1 @slt_wrong_offset(i8 %a) {
> ; CHECK-LABEL: @slt_wrong_offset(
> ; CHECK-NEXT:    [[T:%.*]] = add i8 [[A:%.*]], -6
> </cut>

_______________________________________________
linaro-toolchain mailing list
linaro-toolchain@lists.linaro.org
https://lists.linaro.org/mailman/listinfo/linaro-toolchain

Reply via email to