[llvm-branch-commits] [llvm] DAG: Call SimplifyDemandedBits on copysign value operand (PR #97180)

2024-06-30 Thread Simon Pilgrim via llvm-branch-commits

https://github.com/RKSimon approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/97180
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [CodeGen] Add dump() to MachineTraceMetrics.h (PR #97799)

2024-07-05 Thread Simon Pilgrim via llvm-branch-commits

https://github.com/RKSimon approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/97799
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of min/max ISD nodes (PR #100514)

2024-07-25 Thread Simon Pilgrim via llvm-branch-commits


@@ -42,75 +42,50 @@ define i32 @umax(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 
= call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 
= call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = 
call i32 @llvm.umax.i32(i32 undef, i32 undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = 
call i32 @llvm.umax.i32(i32 undef, i32 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 
= call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 
= call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: 
%V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = 
call i16 @llvm.umax.i16(i16 undef, i16 undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I16 
= call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: 
%V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: 
%V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: 
%V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: 
%V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = 
call i8 @llvm.umax.i8(i8 undef, i8 undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I8 
= call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I8 
= call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I8 
= call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: 
%V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: 
%V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: 
%V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = 
call i16 @llvm.umax.i16(i16 undef, i16 undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 
= call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 
= call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 
= call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = 
call i8 @llvm.umax.i8(i8 undef, i8 undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 
= call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I8 
= call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I8 
= call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: 
%V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: 
%V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> un

[llvm-branch-commits] [llvm] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes (PR #100521)

2024-07-26 Thread Simon Pilgrim via llvm-branch-commits

RKSimon wrote:

I'm not sure whether its better to just focus on removing some of the custom 
lowering (and improve TargetLowering::expandFP_TO_INT_SAT) or just add better 
cost table support.

https://github.com/llvm/llvm-project/pull/100521
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)

2024-08-06 Thread Simon Pilgrim via llvm-branch-commits

https://github.com/RKSimon approved this pull request.

LGTM - although I don't think we have any legal/custom cost test coverage (only 
x86 which exapands)

https://github.com/llvm/llvm-project/pull/100520
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)

2024-08-06 Thread Simon Pilgrim via llvm-branch-commits

https://github.com/RKSimon approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/100519
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-08-07 Thread Simon Pilgrim via llvm-branch-commits

https://github.com/RKSimon approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/100523
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes (PR #100521)

2024-08-07 Thread Simon Pilgrim via llvm-branch-commits

https://github.com/RKSimon approved this pull request.

LGTM - the x86 lowering needs reworking but that shouldn't hold this PR up.

https://github.com/llvm/llvm-project/pull/100521
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Backport [DAGCombine] Fix multi-use miscompile in load combine (#81586) (PR #81633)

2024-02-16 Thread Simon Pilgrim via llvm-branch-commits

https://github.com/RKSimon approved this pull request.

LGTM for backport

https://github.com/llvm/llvm-project/pull/81633
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/18.x: [SelectionDAG] Change computeAliasing signature from optional to LocationSize. (#83017) (PR #83848)

2024-03-04 Thread Simon Pilgrim via llvm-branch-commits

RKSimon wrote:

@davemgreen Are there further patches for scalable types coming or is this just 
to address the ~UINT64_T(0) bugfix?

https://github.com/llvm/llvm-project/pull/83848
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/18.x: [RISCV] Add test for aliasing miscompile fixed by #83017. NFC (PR #83856)

2024-03-05 Thread Simon Pilgrim via llvm-branch-commits

https://github.com/RKSimon approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/83856
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/18.x: [X86] Resolve FIXME: Enable PC relative calls on Windows (PR #84185)

2024-03-06 Thread Simon Pilgrim via llvm-branch-commits

RKSimon wrote:

Now that 18.1 has been released - we shouldn't be merging anything that isn't 
just a regression from 17.x

I've tried to find the release policy for this in case 18.2 is now allow 
further merges but I can't find anything?

https://github.com/llvm/llvm-project/pull/84185
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/18.x [X86_64] fix SSE type error in vaarg (PR #86698)

2024-04-16 Thread Simon Pilgrim via llvm-branch-commits

RKSimon wrote:

What are the current rules on cherry picks for old bugs? AFAICT this patch 
wasn't fixing a bug introduced in the 17.x-18.x development region.

https://github.com/llvm/llvm-project/pull/86698
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/18.x: [X86][EVEX512] Check hasEVEX512 for canExtendTo512DQ (#90390) (PR #90422)

2024-04-29 Thread Simon Pilgrim via llvm-branch-commits

https://github.com/RKSimon approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/90422
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/18.x: [X86][EVEX512] Add `HasEVEX512` when `NoVLX` used for 512-bit patterns (#91106) (PR #91118)

2024-05-05 Thread Simon Pilgrim via llvm-branch-commits

https://github.com/RKSimon approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/91118
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/18.x: [X86][FP16] Do not create VBROADCAST_LOAD for f16 without AVX2 (#91125) (PR #91161)

2024-05-07 Thread Simon Pilgrim via llvm-branch-commits

https://github.com/RKSimon approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/91161
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/18.x: [X86][FP16] Do not create VBROADCAST_LOAD for f16 without AVX2 (#91125) (PR #91425)

2024-05-08 Thread Simon Pilgrim via llvm-branch-commits

https://github.com/RKSimon approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/91425
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/18.x: [DAGCombiner] In mergeTruncStore, make sure we aren't storing shifted in bits. (#90939) (PR #91038)

2024-05-27 Thread Simon Pilgrim via llvm-branch-commits

RKSimon wrote:

> > @AtariDreams I've noticed you've filed a lot of backport requests.  How are 
> > you choosing which fixes to backport? Is there a specific use case you care 
> > about?
> 
> There a particular LLVM miscompile bug in WebKit I'm trying to figure out. 
> It's been there since 2019. Backports is literally just avoiding 
> miscompilations

@AtariDreams Has the bug disappeared in llvm trunk and you think a recent 
commit has fixed/hidden it? Has this bug been reported either to WebKit or LLVM 
that we can track please? Have you been able to confirm if its a llvm bug or UB 
in WebKit?

https://github.com/llvm/llvm-project/pull/91038
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] abc60e9 - [X86] vec_fabs.ll - add SSE test coverage

2023-11-30 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2023-11-30T10:07:00Z
New Revision: abc60e9808820c3f6614e6815909d43ed085460e

URL: 
https://github.com/llvm/llvm-project/commit/abc60e9808820c3f6614e6815909d43ed085460e
DIFF: 
https://github.com/llvm/llvm-project/commit/abc60e9808820c3f6614e6815909d43ed085460e.diff

LOG: [X86] vec_fabs.ll - add SSE test coverage

Added: 


Modified: 
llvm/test/CodeGen/X86/vec_fabs.ll

Removed: 




diff  --git a/llvm/test/CodeGen/X86/vec_fabs.ll 
b/llvm/test/CodeGen/X86/vec_fabs.ll
index ec02dfda30c8502..c17341c2c8b077e 100644
--- a/llvm/test/CodeGen/X86/vec_fabs.ll
+++ b/llvm/test/CodeGen/X86/vec_fabs.ll
@@ -1,24 +1,31 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s 
--check-prefixes=X86,X86-AVX,X86-AVX1
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s 
--check-prefixes=X86,X86-AVX,X86-AVX2
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s 
--check-prefixes=X86,X86-AVX512,X86-AVX512VL
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 | FileCheck 
%s --check-prefixes=X86,X86-AVX512,X86-AVX512FP16
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | 
FileCheck %s --check-prefixes=X86,X86-AVX512,X86-AVX512VLDQ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s 
--check-prefixes=X64,X64-AVX,X64-AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s 
--check-prefixes=X64,X64-AVX,X64-AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck 
%s --check-prefixes=X64,X64-AVX512,X64-AVX512VL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck 
%s --check-prefixes=X64,X64-AVX512,X64-AVX512FP16
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | 
FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512VLDQ
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s 
--check-prefixes=X86,X86-SSE
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s 
--check-prefixes=X86,X86-AVX,X86-AVX1OR2,X86-AVX1
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s 
--check-prefixes=X86,X86-AVX,X86-AVX1OR2,X86-AVX2
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s 
--check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512VL
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 | FileCheck 
%s --check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512FP16
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | 
FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512VLDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s 
--check-prefixes=X64,X64-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s 
--check-prefixes=X64,X64-AVX,X64-AVX1OR2,X64-AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s 
--check-prefixes=X64,X64-AVX,X64-AVX1OR2,X64-AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck 
%s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck 
%s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512FP16
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | 
FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512VLDQ
 
 ;
 ; 128-bit Vectors
 ;
 
-define <2 x double> @fabs_v2f64(<2 x double> %p) {
-; X86-AVX-LABEL: fabs_v2f64:
-; X86-AVX:   # %bb.0:
-; X86-AVX-NEXT:vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX-NEXT:retl
+define <2 x double> @fabs_v2f64(<2 x double> %p) nounwind {
+; X86-SSE-LABEL: fabs_v2f64:
+; X86-SSE:   # %bb.0:
+; X86-SSE-NEXT:andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT:retl
+;
+; X86-AVX1OR2-LABEL: fabs_v2f64:
+; X86-AVX1OR2:   # %bb.0:
+; X86-AVX1OR2-NEXT:vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1OR2-NEXT:retl
 ;
 ; X86-AVX512VL-LABEL: fabs_v2f64:
 ; X86-AVX512VL:   # %bb.0:
@@ -35,10 +42,15 @@ define <2 x double> @fabs_v2f64(<2 x double> %p) {
 ; X86-AVX512VLDQ-NEXT:vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
 ; X86-AVX512VLDQ-NEXT:retl
 ;
-; X64-AVX-LABEL: fabs_v2f64:
-; X64-AVX:   # %bb.0:
-; X64-AVX-NEXT:vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:retq
+; X64-SSE-LABEL: fabs_v2f64:
+; X64-SSE:   # %bb.0:
+; X64-SSE-NEXT:andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE-NEXT:retq
+;
+; X64-AVX1OR2-LABEL: fabs_v2f64:
+; X64-AVX1OR2:   # %bb.0:
+; X64-AVX1OR2-NEXT:vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1OR2-NEXT:retq
 ;
 ; X64-AVX512VL-LABEL: fabs_v2f64:
 ; X64-AVX512VL:   

[llvm-branch-commits] [llvm] [llvm-exegesis] Add additional validation counters (PR #76788)

2024-01-03 Thread Simon Pilgrim via llvm-branch-commits


@@ -121,7 +121,12 @@ def HaswellPfmCounters : ProcPfmCounters {
 PfmIssueCounter<"HWPort7", "uops_executed_port:port_7">
   ];
   let ValidationCounters = [
-PfmValidationCounter
+PfmValidationCounter,
+PfmValidationCounter,
+PfmValidationCounter,
+PfmValidationCounter,
+PfmValidationCounter,
+PfmValidationCounter
   ];

RKSimon wrote:

Could we pull this out into a default list instead of duplicating it? `let 
ValidationCounters = DefaultX86ValidationCounters` or something? 

https://github.com/llvm/llvm-project/pull/76788
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [llvm-exegesis] Add additional validation counters (PR #76788)

2024-01-05 Thread Simon Pilgrim via llvm-branch-commits

RKSimon wrote:

Thanks, no more comments from me - but a exegesis owner should review the rest

https://github.com/llvm/llvm-project/pull/76788
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Backport "[Clang][CodeGen] Fix type for atomic float incdec operators (#107075)" (PR #107184)

2024-09-05 Thread Simon Pilgrim via llvm-branch-commits

https://github.com/RKSimon approved this pull request.


https://github.com/llvm/llvm-project/pull/107184
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [compiler-rt] [llvm] [X86] AMD Zen 5 Initial enablement (PR #108816)

2024-09-16 Thread Simon Pilgrim via llvm-branch-commits

https://github.com/RKSimon approved this pull request.


https://github.com/llvm/llvm-project/pull/108816
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Backport "[Clang][CodeGen] Fix type for atomic float incdec operators (#107075)" (PR #107184)

2024-09-16 Thread Simon Pilgrim via llvm-branch-commits

https://github.com/RKSimon approved this pull request.


https://github.com/llvm/llvm-project/pull/107184
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [compiler-rt] [llvm] [X86] AMD Zen 5 Initial enablement (PR #108816)

2024-09-17 Thread Simon Pilgrim via llvm-branch-commits

RKSimon wrote:

LLVM Release Notes:
```
Changes to the X86 Backend
-mcpu=znver5 is now supported.
```
Clang Release Notes:
```
X86 Support
-march=znver5 is now supported
```
@ganeshgit Are you happy with this?

https://github.com/llvm/llvm-project/pull/108816
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 344afa8 - [Support] TrigramIndex::insert - pass std::String argument by const reference. NFCI.

2021-01-23 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-23T11:04:00Z
New Revision: 344afa853fcfcc085cb5c957b4a07c7ea013bb1b

URL: 
https://github.com/llvm/llvm-project/commit/344afa853fcfcc085cb5c957b4a07c7ea013bb1b
DIFF: 
https://github.com/llvm/llvm-project/commit/344afa853fcfcc085cb5c957b4a07c7ea013bb1b.diff

LOG: [Support] TrigramIndex::insert - pass std::String argument by const 
reference. NFCI.

Avoid string copies and fix clang-tidy warning.

Added: 


Modified: 
llvm/include/llvm/Support/TrigramIndex.h
llvm/lib/Support/TrigramIndex.cpp

Removed: 




diff  --git a/llvm/include/llvm/Support/TrigramIndex.h 
b/llvm/include/llvm/Support/TrigramIndex.h
index 360ab9459790..0be6a1012718 100644
--- a/llvm/include/llvm/Support/TrigramIndex.h
+++ b/llvm/include/llvm/Support/TrigramIndex.h
@@ -38,7 +38,7 @@ class StringRef;
 class TrigramIndex {
  public:
   /// Inserts a new Regex into the index.
-  void insert(std::string Regex);
+  void insert(const std::string &Regex);
 
   /// Returns true, if special case list definitely does not have a line
   /// that matches the query. Returns false, if it's not sure.

diff  --git a/llvm/lib/Support/TrigramIndex.cpp 
b/llvm/lib/Support/TrigramIndex.cpp
index 1f1f3022b0b3..4370adc9c3e0 100644
--- a/llvm/lib/Support/TrigramIndex.cpp
+++ b/llvm/lib/Support/TrigramIndex.cpp
@@ -25,7 +25,7 @@ static bool isAdvancedMetachar(unsigned Char) {
   return strchr(RegexAdvancedMetachars, Char) != nullptr;
 }
 
-void TrigramIndex::insert(std::string Regex) {
+void TrigramIndex::insert(const std::string &Regex) {
   if (Defeated) return;
   std::set Was;
   unsigned Cnt = 0;



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 9641bd0 - [TableGen] RuleMatcher::defineComplexSubOperand avoid std::string copy. NFCI.

2021-01-25 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-25T11:35:44Z
New Revision: 9641bd0f87dda34c09c606358bb0cb08a641a4f6

URL: 
https://github.com/llvm/llvm-project/commit/9641bd0f87dda34c09c606358bb0cb08a641a4f6
DIFF: 
https://github.com/llvm/llvm-project/commit/9641bd0f87dda34c09c606358bb0cb08a641a4f6.diff

LOG: [TableGen] RuleMatcher::defineComplexSubOperand avoid std::string copy. 
NFCI.

Use const reference to avoid std::string copy - accordingly to the style guide 
we shouldn't be using auto anyway.

Fixes MSVC analyzer warning.

Added: 


Modified: 
llvm/utils/TableGen/GlobalISelEmitter.cpp

Removed: 




diff  --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp 
b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 8026a3a102be..cd97733ce984 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -933,7 +933,8 @@ class RuleMatcher : public Matcher {
 StringRef ParentSymbolicName) {
 std::string ParentName(ParentSymbolicName);
 if (ComplexSubOperands.count(SymbolicName)) {
-  auto RecordedParentName = ComplexSubOperandsParentName[SymbolicName];
+  const std::string &RecordedParentName =
+  ComplexSubOperandsParentName[SymbolicName];
   if (RecordedParentName != ParentName)
 return failedImport("Error: Complex suboperand " + SymbolicName +
 " referenced by 
diff erent operands: " +



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] f461e35 - [X86][AVX] combineX86ShuffleChain - avoid bitcasts around insert_subvector() shuffle patterns.

2021-01-25 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-25T11:35:45Z
New Revision: f461e35cbafed593e637305e2a76822dfb7ca6c7

URL: 
https://github.com/llvm/llvm-project/commit/f461e35cbafed593e637305e2a76822dfb7ca6c7
DIFF: 
https://github.com/llvm/llvm-project/commit/f461e35cbafed593e637305e2a76822dfb7ca6c7.diff

LOG: [X86][AVX] combineX86ShuffleChain - avoid bitcasts around 
insert_subvector() shuffle patterns.

We allow insert_subvector lowering of all legal types, so don't always cast to 
the vXi64/vXf64 shuffle types - this is only necessary for 
X86ISD::SHUF128/X86ISD::VPERM2X128 patterns later.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0edc40683ea8..2a86e12dd53c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35357,8 +35357,6 @@ static SDValue combineX86ShuffleChain(ArrayRef 
Inputs, SDValue Root,
   // Handle 128/256-bit lane shuffles of 512-bit vectors.
   if (RootVT.is512BitVector() &&
   (NumBaseMaskElts == 2 || NumBaseMaskElts == 4)) {
-MVT ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64);
-
 // If the upper subvectors are zeroable, then an extract+insert is more
 // optimal than using X86ISD::SHUF128. The insertion is free, even if it 
has
 // to zero the upper subvectors.
@@ -35367,12 +35365,11 @@ static SDValue 
combineX86ShuffleChain(ArrayRef Inputs, SDValue Root,
 return SDValue(); // Nothing to do!
   assert(isInRange(BaseMask[0], 0, NumBaseMaskElts) &&
  "Unexpected lane shuffle");
-  Res = CanonicalizeShuffleInput(ShuffleVT, V1);
-  unsigned SubIdx = BaseMask[0] * (8 / NumBaseMaskElts);
+  Res = CanonicalizeShuffleInput(RootVT, V1);
+  unsigned SubIdx = BaseMask[0] * (NumRootElts / NumBaseMaskElts);
   bool UseZero = isAnyZero(BaseMask);
   Res = extractSubVector(Res, SubIdx, DAG, DL, BaseMaskEltSizeInBits);
-  Res = widenSubVector(Res, UseZero, Subtarget, DAG, DL, RootSizeInBits);
-  return DAG.getBitcast(RootVT, Res);
+  return widenSubVector(Res, UseZero, Subtarget, DAG, DL, RootSizeInBits);
 }
 
 // Narrow shuffle mask to v4x128.
@@ -35423,6 +35420,7 @@ static SDValue combineX86ShuffleChain(ArrayRef 
Inputs, SDValue Root,
 if (!isAnyZero(Mask) && !PreferPERMQ) {
   if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128)
 return SDValue(); // Nothing to do!
+  MVT ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64);
   if (SDValue V = MatchSHUF128(ShuffleVT, DL, Mask, V1, V2, DAG))
 return DAG.getBitcast(RootVT, V);
 }
@@ -35430,8 +35428,6 @@ static SDValue combineX86ShuffleChain(ArrayRef 
Inputs, SDValue Root,
 
   // Handle 128-bit lane shuffles of 256-bit vectors.
   if (RootVT.is256BitVector() && NumBaseMaskElts == 2) {
-MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
-
 // If the upper half is zeroable, then an extract+insert is more optimal
 // than using X86ISD::VPERM2X128. The insertion is free, even if it has to
 // zero the upper half.
@@ -35439,13 +35435,13 @@ static SDValue 
combineX86ShuffleChain(ArrayRef Inputs, SDValue Root,
   if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
 return SDValue(); // Nothing to do!
   assert(isInRange(BaseMask[0], 0, 2) && "Unexpected lane shuffle");
-  Res = CanonicalizeShuffleInput(ShuffleVT, V1);
-  Res = extract128BitVector(Res, BaseMask[0] * 2, DAG, DL);
-  Res = widenSubVector(Res, BaseMask[1] == SM_SentinelZero, Subtarget, DAG,
-   DL, 256);
-  return DAG.getBitcast(RootVT, Res);
+  Res = CanonicalizeShuffleInput(RootVT, V1);
+  Res = extract128BitVector(Res, BaseMask[0] * (NumRootElts / 2), DAG, DL);
+  return widenSubVector(Res, BaseMask[1] == SM_SentinelZero, Subtarget, 
DAG,
+DL, 256);
 }
 
+MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
 if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128)
   return SDValue(); // Nothing to do!
 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 1b780cf - [X86][AVX] LowerTRUNCATE - avoid bitcasts around extract_subvectors.

2021-01-25 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-25T12:10:36Z
New Revision: 1b780cf32e3eea193aa2255b852a7ef164ea00a5

URL: 
https://github.com/llvm/llvm-project/commit/1b780cf32e3eea193aa2255b852a7ef164ea00a5
DIFF: 
https://github.com/llvm/llvm-project/commit/1b780cf32e3eea193aa2255b852a7ef164ea00a5.diff

LOG: [X86][AVX] LowerTRUNCATE - avoid bitcasts around extract_subvectors.

We allow extract_subvector lowering of all legal types, so pre-bitcast the 
source type to try and reduce bitcast pollution.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2a86e12dd53c..d2a07e7364dd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21075,30 +21075,29 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, 
SelectionDAG &DAG) const {
   assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!");
 
   if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
+In = DAG.getBitcast(MVT::v8i32, In);
+
 // On AVX2, v4i64 -> v4i32 becomes VPERMD.
 if (Subtarget.hasInt256()) {
   static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
-  In = DAG.getBitcast(MVT::v8i32, In);
   In = DAG.getVectorShuffle(MVT::v8i32, DL, In, In, ShufMask);
   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,
  DAG.getIntPtrConstant(0, DL));
 }
 
-SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
DAG.getIntPtrConstant(0, DL));
-SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
-   DAG.getIntPtrConstant(2, DL));
-OpLo = DAG.getBitcast(MVT::v4i32, OpLo);
-OpHi = DAG.getBitcast(MVT::v4i32, OpHi);
+SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+   DAG.getIntPtrConstant(4, DL));
 static const int ShufMask[] = {0, 2, 4, 6};
 return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask);
   }
 
   if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
+In = DAG.getBitcast(MVT::v32i8, In);
+
 // On AVX2, v8i32 -> v8i16 becomes PSHUFB.
 if (Subtarget.hasInt256()) {
-  In = DAG.getBitcast(MVT::v32i8, In);
-
   // The PSHUFB mask:
   static const int ShufMask1[] = { 0,  1,  4,  5,  8,  9, 12, 13,
   -1, -1, -1, -1, -1, -1, -1, -1,
@@ -21107,21 +21106,17 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, 
SelectionDAG &DAG) const {
   In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1);
   In = DAG.getBitcast(MVT::v4i64, In);
 
-  static const int ShufMask2[] = {0,  2,  -1,  -1};
-  In = DAG.getVectorShuffle(MVT::v4i64, DL,  In, In, ShufMask2);
-  In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
-   DAG.getIntPtrConstant(0, DL));
-  return DAG.getBitcast(VT, In);
+  static const int ShufMask2[] = {0, 2, -1, -1};
+  In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2);
+  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16,
+ DAG.getBitcast(MVT::v16i16, In),
+ DAG.getIntPtrConstant(0, DL));
 }
 
-SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, In,
DAG.getIntPtrConstant(0, DL));
-
-SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
-   DAG.getIntPtrConstant(4, DL));
-
-OpLo = DAG.getBitcast(MVT::v16i8, OpLo);
-OpHi = DAG.getBitcast(MVT::v16i8, OpHi);
+SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, In,
+   DAG.getIntPtrConstant(16, DL));
 
 // The PSHUFB mask:
 static const int ShufMask1[] = {0,  1,  4,  5,  8,  9, 12, 13,



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 821a51a - [X86][AVX] combineX86ShuffleChainWithExtract - widen to at least original root size. NFCI.

2021-01-25 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-25T13:45:37Z
New Revision: 821a51a9cacfac7da8b34ccc0498d316471f1dbc

URL: 
https://github.com/llvm/llvm-project/commit/821a51a9cacfac7da8b34ccc0498d316471f1dbc
DIFF: 
https://github.com/llvm/llvm-project/commit/821a51a9cacfac7da8b34ccc0498d316471f1dbc.diff

LOG: [X86][AVX] combineX86ShuffleChainWithExtract - widen to at least original 
root size. NFCI.

We're relying on the source inputs for shuffle combining having already been 
widened to the root size (otherwise the offset logic falls over) - we're going 
to be supporting different sized shuffle inputs soon, so we need to explicitly 
make the minimum widened width the original root size.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d2a07e7364dd..ae73a32a5d9a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35997,12 +35997,16 @@ static SDValue combineX86ShuffleChainWithExtract(
   if (NumInputs == 0)
 return SDValue();
 
+  EVT RootVT = Root.getValueType();
+  unsigned RootSizeInBits = RootVT.getSizeInBits();
+  assert((RootSizeInBits % NumMaskElts) == 0 && "Unexpected root shuffle 
mask");
+
   SmallVector WideInputs(Inputs.begin(), Inputs.end());
   SmallVector Offsets(NumInputs, 0);
 
   // Peek through subvectors.
   // TODO: Support inter-mixed EXTRACT_SUBVECTORs + BITCASTs?
-  unsigned WideSizeInBits = WideInputs[0].getValueSizeInBits();
+  unsigned WideSizeInBits = RootSizeInBits;
   for (unsigned i = 0; i != NumInputs; ++i) {
 SDValue &Src = WideInputs[i];
 unsigned &Offset = Offsets[i];
@@ -36025,8 +36029,6 @@ static SDValue combineX86ShuffleChainWithExtract(
   if (llvm::all_of(Offsets, [](unsigned Offset) { return Offset == 0; }))
 return SDValue();
 
-  EVT RootVT = Root.getValueType();
-  unsigned RootSizeInBits = RootVT.getSizeInBits();
   unsigned Scale = WideSizeInBits / RootSizeInBits;
   assert((WideSizeInBits % RootSizeInBits) == 0 &&
  "Unexpected subvector extraction");



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 13f2aee - [X86][AVX] Generalize vperm2f128/vperm2i128 patterns to support all legal 256-bit vector types

2021-01-25 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-25T15:35:36Z
New Revision: 13f2aee7831c9bec17006a6d401008df541a121d

URL: 
https://github.com/llvm/llvm-project/commit/13f2aee7831c9bec17006a6d401008df541a121d
DIFF: 
https://github.com/llvm/llvm-project/commit/13f2aee7831c9bec17006a6d401008df541a121d.diff

LOG: [X86][AVX] Generalize vperm2f128/vperm2i128 patterns to support all legal 
256-bit vector types

Remove bitcasts to/from v4x64 types through vperm2f128/vperm2i128 ops to help 
improve shuffle combining and demanded vector elts folding.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86InstrSSE.td
llvm/test/CodeGen/X86/haddsub-2.ll
llvm/test/CodeGen/X86/masked_store_trunc.ll
llvm/test/CodeGen/X86/var-permute-256.ll
llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
llvm/test/CodeGen/X86/vector-trunc.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ae73a32a5d9a..fc19800eda79 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35436,7 +35436,6 @@ static SDValue combineX86ShuffleChain(ArrayRef 
Inputs, SDValue Root,
 DL, 256);
 }
 
-MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
 if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128)
   return SDValue(); // Nothing to do!
 
@@ -35449,12 +35448,9 @@ static SDValue 
combineX86ShuffleChain(ArrayRef Inputs, SDValue Root,
   unsigned PermMask = 0;
   PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
   PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
-
-  Res = CanonicalizeShuffleInput(ShuffleVT, V1);
-  Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res,
-DAG.getUNDEF(ShuffleVT),
-DAG.getTargetConstant(PermMask, DL, MVT::i8));
-  return DAG.getBitcast(RootVT, Res);
+  return DAG.getNode(
+  X86ISD::VPERM2X128, DL, RootVT, CanonicalizeShuffleInput(RootVT, V1),
+  DAG.getUNDEF(RootVT), DAG.getTargetConstant(PermMask, DL, MVT::i8));
 }
 
 if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128)
@@ -35470,14 +35466,12 @@ static SDValue 
combineX86ShuffleChain(ArrayRef Inputs, SDValue Root,
 unsigned PermMask = 0;
 PermMask |= ((BaseMask[0] & 3) << 0);
 PermMask |= ((BaseMask[1] & 3) << 4);
-
 SDValue LHS = isInRange(BaseMask[0], 0, 2) ? V1 : V2;
 SDValue RHS = isInRange(BaseMask[1], 0, 2) ? V1 : V2;
-Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT,
-  CanonicalizeShuffleInput(ShuffleVT, LHS),
-  CanonicalizeShuffleInput(ShuffleVT, RHS),
+return DAG.getNode(X86ISD::VPERM2X128, DL, RootVT,
+  CanonicalizeShuffleInput(RootVT, LHS),
+  CanonicalizeShuffleInput(RootVT, RHS),
   DAG.getTargetConstant(PermMask, DL, MVT::i8));
-return DAG.getBitcast(RootVT, Res);
   }
 }
   }
@@ -37323,11 +37317,26 @@ static SDValue combineTargetShuffle(SDValue N, 
SelectionDAG &DAG,
 return SDValue();
   }
   case X86ISD::VPERM2X128: {
+// Fold vperm2x128(bitcast(x),bitcast(y),c) -> bitcast(vperm2x128(x,y,c)).
+SDValue LHS = N->getOperand(0);
+SDValue RHS = N->getOperand(1);
+if (LHS.getOpcode() == ISD::BITCAST &&
+(RHS.getOpcode() == ISD::BITCAST || RHS.isUndef())) {
+  EVT SrcVT = LHS.getOperand(0).getValueType();
+  if (RHS.isUndef() || SrcVT == RHS.getOperand(0).getValueType()) {
+return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT,
+  DAG.getBitcast(SrcVT, LHS),
+  DAG.getBitcast(SrcVT, RHS),
+  N->getOperand(2)));
+  }
+}
+
+// Fold vperm2x128(op(),op()) -> op(vperm2x128(),vperm2x128()).
 if (SDValue Res = canonicalizeLaneShuffleWithRepeatedOps(N, DAG, DL))
-return Res;
+  return Res;
 
-// Combine vperm2x128 subvector shuffle with an inner concat pattern.
-// vperm2x128(concat(X,Y),concat(Z,W)) --> concat X,Y etc.
+// Fold vperm2x128 subvector shuffle with an inner concat pattern.
+// vperm2x128(concat(X,Y),concat(Z,W)) --> concat X,Y etc.  
 auto FindSubVector128 = [&](unsigned Idx) {
   if (Idx > 3)
 return SDValue();

diff  --git a/llvm/lib/Target/X86/X86InstrSSE.td 
b/llvm/lib/Target/X86/X86InstrSSE.td
index 071c638077b2..7cf555748c46 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -7287,16 +7287,12 

[llvm-branch-commits] [llvm] 827d0c5 - [X86] combineToExtendBoolVectorInReg - use explicit arguments. NFC.

2022-02-12 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2022-02-11T16:40:29Z
New Revision: 827d0c51be93c4b0bcbe43a6cbbcc0e65a8b9f58

URL: 
https://github.com/llvm/llvm-project/commit/827d0c51be93c4b0bcbe43a6cbbcc0e65a8b9f58
DIFF: 
https://github.com/llvm/llvm-project/commit/827d0c51be93c4b0bcbe43a6cbbcc0e65a8b9f58.diff

LOG: [X86] combineToExtendBoolVectorInReg - use explicit arguments. NFC.

Replace the *_EXTEND node with the raw operands, this will make it easier to 
use combineToExtendBoolVectorInReg for any boolvec extension combine.

Cleanup prep for Issue #53760

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 53c00affd70e6..84c7ff58ae9b0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -50422,11 +50422,9 @@ static SDValue combineToExtendCMOV(SDNode *Extend, 
SelectionDAG &DAG) {
 
 // Convert (vXiY *ext(vXi1 bitcast(iX))) to extend_in_reg(broadcast(iX)).
 // This is more or less the reverse of combineBitcastvxi1.
-static SDValue
-combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG,
-   TargetLowering::DAGCombinerInfo &DCI,
-   const X86Subtarget &Subtarget) {
-  unsigned Opcode = N->getOpcode();
+static SDValue combineToExtendBoolVectorInReg(
+unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N0, SelectionDAG &DAG,
+TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) {
   if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND &&
   Opcode != ISD::ANY_EXTEND)
 return SDValue();
@@ -50435,8 +50433,6 @@ combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG 
&DAG,
   if (!Subtarget.hasSSE2() || Subtarget.hasAVX512())
 return SDValue();
 
-  SDValue N0 = N->getOperand(0);
-  EVT VT = N->getValueType(0);
   EVT SVT = VT.getScalarType();
   EVT InSVT = N0.getValueType().getScalarType();
   unsigned EltSizeInBits = SVT.getSizeInBits();
@@ -50451,13 +50447,12 @@ combineToExtendBoolVectorInReg(SDNode *N, 
SelectionDAG &DAG,
 return SDValue();
 
   SDValue N00 = N0.getOperand(0);
-  EVT SclVT = N0.getOperand(0).getValueType();
+  EVT SclVT = N00.getValueType();
   if (!SclVT.isScalarInteger())
 return SDValue();
 
-  SDLoc DL(N);
   SDValue Vec;
-  SmallVector ShuffleMask;
+  SmallVector ShuffleMask;
   unsigned NumElts = VT.getVectorNumElements();
   assert(NumElts == SclVT.getSizeInBits() && "Unexpected bool vector size");
 
@@ -50603,7 +50598,8 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
   if (SDValue V = combineExtSetcc(N, DAG, Subtarget))
 return V;
 
-  if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget))
+  if (SDValue V = combineToExtendBoolVectorInReg(N->getOpcode(), DL, VT, N0,
+ DAG, DCI, Subtarget))
 return V;
 
   if (VT.isVector()) {
@@ -50757,7 +50753,8 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
 if (SDValue V = combineExtSetcc(N, DAG, Subtarget))
   return V;
 
-  if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget))
+  if (SDValue V = combineToExtendBoolVectorInReg(N->getOpcode(), dl, VT, N0,
+ DAG, DCI, Subtarget))
 return V;
 
   if (VT.isVector())



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 48e1434 - [X86] Move combineToExtendBoolVectorInReg before the select combines. NFC.

2022-02-12 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2022-02-11T16:51:46Z
New Revision: 48e1434a0a77852f58c1617123f228f1069ba775

URL: 
https://github.com/llvm/llvm-project/commit/48e1434a0a77852f58c1617123f228f1069ba775
DIFF: 
https://github.com/llvm/llvm-project/commit/48e1434a0a77852f58c1617123f228f1069ba775.diff

LOG: [X86] Move combineToExtendBoolVectorInReg before the select combines. NFC.

Avoid the need for a forward declaration.

Cleanup prep for Issue #53760

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 84c7ff58ae9b0..e91f68425522f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43123,6 +43123,104 @@ static SDValue combineExtractVectorElt(SDNode *N, 
SelectionDAG &DAG,
   return SDValue();
 }
 
+// Convert (vXiY *ext(vXi1 bitcast(iX))) to extend_in_reg(broadcast(iX)).
+// This is more or less the reverse of combineBitcastvxi1.
+static SDValue combineToExtendBoolVectorInReg(
+unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N0, SelectionDAG &DAG,
+TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) {
+  if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND &&
+  Opcode != ISD::ANY_EXTEND)
+return SDValue();
+  if (!DCI.isBeforeLegalizeOps())
+return SDValue();
+  if (!Subtarget.hasSSE2() || Subtarget.hasAVX512())
+return SDValue();
+
+  EVT SVT = VT.getScalarType();
+  EVT InSVT = N0.getValueType().getScalarType();
+  unsigned EltSizeInBits = SVT.getSizeInBits();
+
+  // Input type must be extending a bool vector (bit-casted from a scalar
+  // integer) to legal integer types.
+  if (!VT.isVector())
+return SDValue();
+  if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16 && SVT != MVT::i8)
+return SDValue();
+  if (InSVT != MVT::i1 || N0.getOpcode() != ISD::BITCAST)
+return SDValue();
+
+  SDValue N00 = N0.getOperand(0);
+  EVT SclVT = N00.getValueType();
+  if (!SclVT.isScalarInteger())
+return SDValue();
+
+  SDValue Vec;
+  SmallVector ShuffleMask;
+  unsigned NumElts = VT.getVectorNumElements();
+  assert(NumElts == SclVT.getSizeInBits() && "Unexpected bool vector size");
+
+  // Broadcast the scalar integer to the vector elements.
+  if (NumElts > EltSizeInBits) {
+// If the scalar integer is greater than the vector element size, then we
+// must split it down into sub-sections for broadcasting. For example:
+//   i16 -> v16i8 (i16 -> v8i16 -> v16i8) with 2 sub-sections.
+//   i32 -> v32i8 (i32 -> v8i32 -> v32i8) with 4 sub-sections.
+assert((NumElts % EltSizeInBits) == 0 && "Unexpected integer scale");
+unsigned Scale = NumElts / EltSizeInBits;
+EVT BroadcastVT = EVT::getVectorVT(*DAG.getContext(), SclVT, 
EltSizeInBits);
+Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00);
+Vec = DAG.getBitcast(VT, Vec);
+
+for (unsigned i = 0; i != Scale; ++i)
+  ShuffleMask.append(EltSizeInBits, i);
+Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask);
+  } else if (Subtarget.hasAVX2() && NumElts < EltSizeInBits &&
+ (SclVT == MVT::i8 || SclVT == MVT::i16 || SclVT == MVT::i32)) {
+// If we have register broadcast instructions, use the scalar size as the
+// element type for the shuffle. Then cast to the wider element type. The
+// widened bits won't be used, and this might allow the use of a broadcast
+// load.
+assert((EltSizeInBits % NumElts) == 0 && "Unexpected integer scale");
+unsigned Scale = EltSizeInBits / NumElts;
+EVT BroadcastVT =
+EVT::getVectorVT(*DAG.getContext(), SclVT, NumElts * Scale);
+Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00);
+ShuffleMask.append(NumElts * Scale, 0);
+Vec = DAG.getVectorShuffle(BroadcastVT, DL, Vec, Vec, ShuffleMask);
+Vec = DAG.getBitcast(VT, Vec);
+  } else {
+// For smaller scalar integers, we can simply any-extend it to the vector
+// element size (we don't care about the upper bits) and broadcast it to 
all
+// elements.
+SDValue Scl = DAG.getAnyExtOrTrunc(N00, DL, SVT);
+Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl);
+ShuffleMask.append(NumElts, 0);
+Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask);
+  }
+
+  // Now, mask the relevant bit in each element.
+  SmallVector Bits;
+  for (unsigned i = 0; i != NumElts; ++i) {
+int BitIdx = (i % EltSizeInBits);
+APInt Bit = APInt::getBitsSet(EltSizeInBits, BitIdx, BitIdx + 1);
+Bits.push_back(DAG.getConstant(Bit, DL, SVT));
+  }
+  SDValue BitMask = DAG.getBuildVector(VT, DL, Bits);
+  Vec = DAG.getNode(ISD::AND, DL, VT, Vec, BitMask);
+
+  // Compare against the bitmask and extend the result.
+  EVT CCVT = VT.changeVectorElementType(MVT::i1);
+  Vec = DAG.getSetCC(DL, CCVT, Vec, BitMask, ISD::

[llvm-branch-commits] [llvm-branch] r282753 - [3.9.1] Merging r280837 [X86] Don't reduce the width of vector mul if the target doesn't support SSE2.

2016-09-29 Thread Simon Pilgrim via llvm-branch-commits
Author: rksimon
Date: Thu Sep 29 14:16:52 2016
New Revision: 282753

URL: http://llvm.org/viewvc/llvm-project?rev=282753&view=rev
Log:
[3.9.1] Merging r280837 [X86] Don't reduce the width of vector mul if the 
target doesn't support SSE2.

The patch is to fix PR30298, which is caused by rL272694. The solution is to
bail out if the target has no SSE2.

Differential Revision: https://reviews.llvm.org/D24288

Added:
llvm/branches/release_39/test/CodeGen/X86/pr30298.ll
  - copied unchanged from r280837, llvm/trunk/test/CodeGen/X86/pr30298.ll
Modified:
llvm/branches/release_39/   (props changed)
llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp

Propchange: llvm/branches/release_39/
--
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Sep 29 14:16:52 2016
@@ -1,3 +1,3 @@
 /llvm/branches/Apple/Pertwee:110850,110961
 /llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,275868-275870,275879,275898,275928,275935,275946,275978,275981,276015,276051,276077,276109,276119,276181,276209,276236-276237,276358,276364,276368,276389,276435,276438,276479,276510,276648,276676,276712,276740,276823,276956,276980,277093,277114,277135,277371,277399,277500,277504,277625,277691,277693,23,278002,278086,278133,278157,278343,278370,278413,278558-278559,278562,278569,278571,278573,278575,278584,278841,278900,278938,278999,279125,279268,279369,279647,281957
+/llvm/trunk:155241,275868-275870,275879,275898,275928,275935,275946,275978,275981,276015,276051,276077,276109,276119,276181,276209,276236-276237,276358,276364,276368,276389,276435,276438,276479,276510,276648,276676,276712,276740,276823,276956,276980,277093,277114,277135,277371,277399,277500,277504,277625,277691,277693,23,278002,278086,278133,278157,278343,278370,278413,278558-278559,278562,278569,278571,278573,278575,278584,278841,278900,278938,278999,279125,279268,279369,279647,280837,281957

Modified: llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp
URL: 
http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp?rev=282753&r1=282752&r2=282753&view=diff
==
--- llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp Thu Sep 29 
14:16:52 2016
@@ -27516,7 +27516,8 @@ static SDValue reduceVMULWidth(SDNode *N
const X86Subtarget &Subtarget) {
   // pmulld is supported since SSE41. It is better to use pmulld
   // instead of pmullw+pmulhw.
-  if (Subtarget.hasSSE41())
+  // pmullw/pmulhw are not supported by SSE.
+  if (Subtarget.hasSSE41() || !Subtarget.hasSSE2())
 return SDValue();
 
   ShrinkMode Mode;


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm-branch] r286248 - [3.9.1] Merging r282613 - [X86][AVX] Add test showing that VBROADCAST loads don't correctly respect dependencies

2016-11-08 Thread Simon Pilgrim via llvm-branch-commits
Author: rksimon
Date: Tue Nov  8 10:45:26 2016
New Revision: 286248

URL: http://llvm.org/viewvc/llvm-project?rev=286248&view=rev
Log:
[3.9.1] Merging r282613 - [X86][AVX] Add test showing that VBROADCAST loads 
don't correctly respect dependencies

As discussed in PR30596, this is a preliminary test update before we can merge 
r283070

Note: This required the test to be regenerated after the merge as 3.9.1 doesn't 
have trunk's latest lea -> mov simplifications

Modified:
llvm/branches/release_39/   (props changed)
llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll

Propchange: llvm/branches/release_39/
--
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Nov  8 10:45:26 2016
@@ -1,3 +1,3 @@
 /llvm/branches/Apple/Pertwee:110850,110961
 /llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,275868-275870,275879,275898,275928,275935,275946,275978,275981,276015,276051,276077,276109,276119,276181,276209,276236-276237,276358,276364,276368,276389,276435,276438,276479,276510,276648,276676,276712,276740,276823,276956,276980,277093,277114,277135,277371,277399,277500,277504,277625,277691,277693,23,278002,278086,278133,278157,278343,278370,278413,278558-278559,278562,278569,278571,278573,278575,278584,278841,278900,278938,278999,279125,279268,279369,279647,280837,281957,283129
+/llvm/trunk:155241,275868-275870,275879,275898,275928,275935,275946,275978,275981,276015,276051,276077,276109,276119,276181,276209,276236-276237,276358,276364,276368,276389,276435,276438,276479,276510,276648,276676,276712,276740,276823,276956,276980,277093,277114,277135,277371,277399,277500,277504,277625,277691,277693,23,278002,278086,278133,278157,278343,278370,278413,278558-278559,278562,278569,278571,278573,278575,278584,278841,278900,278938,278999,279125,279268,279369,279647,280837,281957,282613,283129

Modified: llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll
URL: 
http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll?rev=286248&r1=286247&r2=286248&view=diff
==
--- llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll (original)
+++ llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll Tue Nov  8 
10:45:26 2016
@@ -546,3 +546,62 @@ define <4 x double> @splat_concat4(doubl
   %6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> 
   ret <4 x double> %6
 }
+
+;
+; FIXME: When VBROADCAST replaces an existing load, ensure it still respects 
lifetime dependencies.
+;
+define float @broadcast_lifetime() nounwind {
+; X32-LABEL: broadcast_lifetime:
+; X32:   ## BB#0:
+; X32-NEXT:pushl %esi
+; X32-NEXT:subl $40, %esp
+; X32-NEXT:leal {{[0-9]+}}(%esp), %esi
+; X32-NEXT:movl %esi, (%esp)
+; X32-NEXT:calll _gfunc
+; X32-NEXT:movl %esi, (%esp)
+; X32-NEXT:calll _gfunc
+; X32-NEXT:vbroadcastss {{[0-9]+}}(%esp), %xmm0
+; X32-NEXT:vbroadcastss {{[0-9]+}}(%esp), %xmm1
+; X32-NEXT:vsubss %xmm0, %xmm1, %xmm0
+; X32-NEXT:vmovss %xmm0, {{[0-9]+}}(%esp)
+; X32-NEXT:flds {{[0-9]+}}(%esp)
+; X32-NEXT:addl $40, %esp
+; X32-NEXT:popl %esi
+; X32-NEXT:retl
+;
+; X64-LABEL: broadcast_lifetime:
+; X64:   ## BB#0:
+; X64-NEXT:subq $24, %rsp
+; X64-NEXT:leaq (%rsp), %rdi
+; X64-NEXT:callq _gfunc
+; X64-NEXT:leaq (%rsp), %rdi
+; X64-NEXT:callq _gfunc
+; X64-NEXT:vbroadcastss {{[0-9]+}}(%rsp), %xmm0
+; X64-NEXT:vbroadcastss {{[0-9]+}}(%rsp), %xmm1
+; X64-NEXT:vsubss %xmm0, %xmm1, %xmm0
+; X64-NEXT:addq $24, %rsp
+; X64-NEXT:retq
+  %1 = alloca <4 x float>, align 16
+  %2 = alloca <4 x float>, align 16
+  %3 = bitcast <4 x float>* %1 to i8*
+  %4 = bitcast <4 x float>* %2 to i8*
+
+  call void @llvm.lifetime.start(i64 16, i8* %3)
+  call void @gfunc(<4 x float>* %1)
+  %5 = load <4 x float>, <4 x float>* %1, align 16
+  call void @llvm.lifetime.end(i64 16, i8* %3)
+
+  call void @llvm.lifetime.start(i64 16, i8* %4)
+  call void @gfunc(<4 x float>* %2)
+  %6 = load <4 x float>, <4 x float>* %2, align 16
+  call void @llvm.lifetime.end(i64 16, i8* %4)
+
+  %7 = extractelement <4 x float> %5, i32 1
+  %8 = extractelement <4 x float> %6, i32 1
+  %9 = fsub float %8, %7
+  ret float %9
+}
+
+declare void @gfunc(<4 x float>*)
+declare void @llvm.lifetime.start(i64, i8*)
+declare void @llvm.lifetime.end(i64, i8*)


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm-branch] r286251 - [3.9.1] Merging r283070 - [X86][AVX] Ensure broadcast loads respect dependencies

2016-11-08 Thread Simon Pilgrim via llvm-branch-commits
Author: rksimon
Date: Tue Nov  8 11:01:05 2016
New Revision: 286251

URL: http://llvm.org/viewvc/llvm-project?rev=286251&view=rev
Log:
[3.9.1] Merging r283070 - [X86][AVX] Ensure broadcast loads respect dependencies

To allow broadcast loads of a non-zero'th vector element, 
lowerVectorShuffleAsBroadcast can replace a load with a new load with an 
adjusted address, but unfortunately we weren't ensuring that the new load 
respected the same dependencies.

This patch adds a TokenFactor and updates all dependencies of the old load to 
reference the new load instead.

Bug found during internal testing.

Differential Revision: https://reviews.llvm.org/D25039

As discussed on PR30596

Modified:
llvm/branches/release_39/   (props changed)
llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp
llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll

Propchange: llvm/branches/release_39/
--
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Nov  8 11:01:05 2016
@@ -1,3 +1,3 @@
 /llvm/branches/Apple/Pertwee:110850,110961
 /llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,275868-275870,275879,275898,275928,275935,275946,275978,275981,276015,276051,276077,276109,276119,276181,276209,276236-276237,276358,276364,276368,276389,276435,276438,276479,276510,276648,276676,276712,276740,276823,276956,276980,277093,277114,277135,277371,277399,277500,277504,277625,277691,277693,23,278002,278086,278133,278157,278343,278370,278413,278558-278559,278562,278569,278571,278573,278575,278584,278841,278900,278938,278999,279125,279268,279369,279647,280837,281957,282613,283129
+/llvm/trunk:155241,275868-275870,275879,275898,275928,275935,275946,275978,275981,276015,276051,276077,276109,276119,276181,276209,276236-276237,276358,276364,276368,276389,276435,276438,276479,276510,276648,276676,276712,276740,276823,276956,276980,277093,277114,277135,277371,277399,277500,277504,277625,277691,277693,23,278002,278086,278133,278157,278343,278370,278413,278558-278559,278562,278569,278571,278573,278575,278584,278841,278900,278938,278999,279125,279268,279369,279647,280837,281957,282613,283070,283129

Modified: llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp
URL: 
http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp?rev=286251&r1=286250&r2=286251&view=diff
==
--- llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp Tue Nov  8 
11:01:05 2016
@@ -8656,6 +8656,17 @@ static SDValue lowerVectorShuffleAsBroad
 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
 DAG.getMachineFunction().getMachineMemOperand(
 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
+
+// Make sure the newly-created LOAD is in the same position as Ld in
+// terms of dependency. We create a TokenFactor for Ld and V,
+// and update uses of Ld's output chain to use the TokenFactor.
+if (Ld->hasAnyUseOfValue(1)) {
+  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ SDValue(Ld, 1), SDValue(V.getNode(), 1));
+  DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);
+  DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1),
+ SDValue(V.getNode(), 1));
+}
   } else if (!BroadcastFromReg) {
 // We can't broadcast from a vector register.
 return SDValue();

Modified: llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll
URL: 
http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll?rev=286251&r1=286250&r2=286251&view=diff
==
--- llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll (original)
+++ llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll Tue Nov  8 
11:01:05 2016
@@ -548,38 +548,40 @@ define <4 x double> @splat_concat4(doubl
 }
 
 ;
-; FIXME: When VBROADCAST replaces an existing load, ensure it still respects 
lifetime dependencies.
+; When VBROADCAST replaces an existing load, ensure it still respects lifetime 
dependencies.
 ;
 define float @broadcast_lifetime() nounwind {
 ; X32-LABEL: broadcast_lifetime:
 ; X32:   ## BB#0:
 ; X32-NEXT:pushl %esi
-; X32-NEXT:subl $40, %esp
+; X32-NEXT:subl $56, %esp
 ; X32-NEXT:leal {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:movl %esi, (%esp)
 ; X32-NEXT:calll _gfunc
+; X32-NEXT:vbroadcastss {{[0-9]+}}(%esp), %xmm0
+; X32-NEXT:vmovaps %xmm0, {{[0-9]+}}(%esp) ## 16-byte Spill
 ; X32-NEXT:movl %esi, (%esp)
 ; X32-NEXT:calll _gfunc
 ; X32-NEXT:vbroadcastss {{[0-9]+}}(%esp), %xmm0
-; X32-NEXT:vbroadcastss {{[0-9]+}}(%esp), %xmm1
-; X32-NEXT:vsubss %xmm0, %xmm1, %xmm0
+;

[llvm-branch-commits] [llvm] 85aaa3e - [X86] Regenerate sdiv_fix_sat.ll + udiv_fix_sat.ll tests

2021-01-12 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-12T17:25:30Z
New Revision: 85aaa3e310c23ec8a375b7a2e2fceee5a72441ef

URL: 
https://github.com/llvm/llvm-project/commit/85aaa3e310c23ec8a375b7a2e2fceee5a72441ef
DIFF: 
https://github.com/llvm/llvm-project/commit/85aaa3e310c23ec8a375b7a2e2fceee5a72441ef.diff

LOG: [X86] Regenerate sdiv_fix_sat.ll + udiv_fix_sat.ll tests

Adding missing libcall PLT qualifiers

Added: 


Modified: 
llvm/test/CodeGen/X86/sdiv_fix_sat.ll
llvm/test/CodeGen/X86/udiv_fix_sat.ll

Removed: 




diff  --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll 
b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index 512488e8f872..617d5d7876bd 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -322,7 +322,7 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
 ; X64-NEXT:movq %r15, %rdi
 ; X64-NEXT:movq %r12, %rsi
 ; X64-NEXT:movq %r13, %rcx
-; X64-NEXT:callq __divti3
+; X64-NEXT:callq __divti3@PLT
 ; X64-NEXT:movq %rax, %rbx
 ; X64-NEXT:movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:movq %rdx, %rbp
@@ -338,7 +338,7 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
 ; X64-NEXT:movq %r12, %rsi
 ; X64-NEXT:movq (%rsp), %rdx # 8-byte Reload
 ; X64-NEXT:movq %r13, %rcx
-; X64-NEXT:callq __modti3
+; X64-NEXT:callq __modti3@PLT
 ; X64-NEXT:orq %rax, %rdx
 ; X64-NEXT:setne %al
 ; X64-NEXT:testb %r14b, %al
@@ -613,7 +613,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X64-NEXT:movq %r12, %rdi
 ; X64-NEXT:movq %rbp, %rsi
 ; X64-NEXT:movq %r15, %rcx
-; X64-NEXT:callq __divti3
+; X64-NEXT:callq __divti3@PLT
 ; X64-NEXT:movq %rax, %r13
 ; X64-NEXT:movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:movq %rdx, %r14
@@ -626,7 +626,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X64-NEXT:movq %rbp, %rsi
 ; X64-NEXT:movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
 ; X64-NEXT:movq %r15, %rcx
-; X64-NEXT:callq __modti3
+; X64-NEXT:callq __modti3@PLT
 ; X64-NEXT:orq %rax, %rdx
 ; X64-NEXT:setne %al
 ; X64-NEXT:testb %bl, %al
@@ -668,7 +668,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X64-NEXT:movq %r15, %rdi
 ; X64-NEXT:movq %r13, %rsi
 ; X64-NEXT:movq %rbp, %rcx
-; X64-NEXT:callq __divti3
+; X64-NEXT:callq __divti3@PLT
 ; X64-NEXT:movq %rax, %r12
 ; X64-NEXT:movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:movq %rdx, %r14
@@ -681,7 +681,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X64-NEXT:movq %r13, %rsi
 ; X64-NEXT:movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
 ; X64-NEXT:movq %rbp, %rcx
-; X64-NEXT:callq __modti3
+; X64-NEXT:callq __modti3@PLT
 ; X64-NEXT:orq %rax, %rdx
 ; X64-NEXT:setne %al
 ; X64-NEXT:testb %bl, %al
@@ -735,7 +735,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X64-NEXT:movq %r15, %rdi
 ; X64-NEXT:movq %r12, %rsi
 ; X64-NEXT:movq %rbp, %rcx
-; X64-NEXT:callq __divti3
+; X64-NEXT:callq __divti3@PLT
 ; X64-NEXT:movq %rax, %r13
 ; X64-NEXT:movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:movq %rdx, %r14
@@ -748,7 +748,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X64-NEXT:movq %r12, %rsi
 ; X64-NEXT:movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
 ; X64-NEXT:movq %rbp, %rcx
-; X64-NEXT:callq __modti3
+; X64-NEXT:callq __modti3@PLT
 ; X64-NEXT:orq %rax, %rdx
 ; X64-NEXT:setne %al
 ; X64-NEXT:testb %bl, %al
@@ -790,7 +790,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X64-NEXT:movq %r15, %rdi
 ; X64-NEXT:movq %r13, %rsi
 ; X64-NEXT:movq %rbp, %rcx
-; X64-NEXT:callq __divti3
+; X64-NEXT:callq __divti3@PLT
 ; X64-NEXT:movq %rax, %r12
 ; X64-NEXT:movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 ; X64-NEXT:movq %rdx, %r14
@@ -803,7 +803,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X64-NEXT:movq %r13, %rsi
 ; X64-NEXT:movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
 ; X64-NEXT:movq %rbp, %rcx
-; X64-NEXT:callq __modti3
+; X64-NEXT:callq __modti3@PLT
 ; X64-NEXT:orq %rax, %rdx
 ; X64-NEXT:setne %al
 ; X64-NEXT:testb %bl, %al

diff  --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll 
b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
index d2e3b80c2145..2be51c3ccbba 100644
--- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
@@ -179,7 +179,7 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
 ; X64-NEXT:shlq $32, %rdi
 ; X64-NEXT:xorl %ebx, %ebx
 ; X64-NEXT:xorl %ecx, %ecx
-; X64-NEXT:callq __udivti3
+; X64-NEXT:callq __udivti3@PLT
 ; X64-NEXT:cmpq $-1, %rax
 ; X64-NEXT:movq $-1, %rcx
 ; X

[llvm-branch-commits] [llvm] a4931d4 - [AMDGPU] Regenerate umax crash test

2021-01-12 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-12T18:02:15Z
New Revision: a4931d4fe38d6feef53f97f3dcc7792bfcb06c84

URL: 
https://github.com/llvm/llvm-project/commit/a4931d4fe38d6feef53f97f3dcc7792bfcb06c84
DIFF: 
https://github.com/llvm/llvm-project/commit/a4931d4fe38d6feef53f97f3dcc7792bfcb06c84.diff

LOG: [AMDGPU] Regenerate umax crash test

Added: 


Modified: 
llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll

Removed: 




diff  --git a/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll 
b/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
index b7ed34bbf09b..b4cd36daad65 100644
--- a/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
@@ -1,8 +1,27 @@
-; RUN: llc -march=r600 -mcpu=cypress -start-after safe-stack %s -o - | 
FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=r600 -mcpu=cypress -start-after safe-stack | FileCheck 
%s
 ; Don't crash
 
-; CHECK: MAX_UINT
 define amdgpu_kernel void @test(i64 addrspace(1)* %out) {
+; CHECK-LABEL: test:
+; CHECK:   ; %bb.0: ; %bb
+; CHECK-NEXT:ALU 4, @6, KC0[CB0:0-32], KC1[]
+; CHECK-NEXT:MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 0
+; CHECK-NEXT:ALU 3, @11, KC0[], KC1[]
+; CHECK-NEXT:MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; CHECK-NEXT:CF_END
+; CHECK-NEXT:PAD
+; CHECK-NEXT:ALU clause starting at 6:
+; CHECK-NEXT: MOV T0.X, literal.x,
+; CHECK-NEXT: MOV T0.Y, 0.0,
+; CHECK-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CHECK-NEXT:2(2.802597e-45), 0(0.00e+00)
+; CHECK-NEXT: MOV * T0.W, KC0[2].Y,
+; CHECK-NEXT:ALU clause starting at 11:
+; CHECK-NEXT: MAX_UINT T0.X, T0.X, literal.x,
+; CHECK-NEXT: MOV T0.Y, 0.0,
+; CHECK-NEXT: LSHR * T1.X, T0.W, literal.y,
+; CHECK-NEXT:4(5.605194e-45), 2(2.802597e-45)
 bb:
   store i64 2, i64 addrspace(1)* %out
   %tmp = load i64, i64 addrspace(1)* %out



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 0f59d09 - [X86][AVX] combineVectorSignBitsTruncation - limit AVX512 truncations to 128-bits (PR48727)

2021-01-13 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-13T10:38:23Z
New Revision: 0f59d099571d3d803b54e2ce06aa94babb9b26db

URL: 
https://github.com/llvm/llvm-project/commit/0f59d099571d3d803b54e2ce06aa94babb9b26db
DIFF: 
https://github.com/llvm/llvm-project/commit/0f59d099571d3d803b54e2ce06aa94babb9b26db.diff

LOG: [X86][AVX] combineVectorSignBitsTruncation - limit AVX512 truncations to 
128-bits (PR48727)

rG73a44f437bf1 result in 256-bit packss/packus ops with additional shuffles 
that shuffle combining can sometimes try to convert back into a truncation.

Added: 
llvm/test/CodeGen/X86/pr48727.ll

Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-pack-256.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 65b784f31842..5949782f3c0c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -45957,11 +45957,11 @@ static SDValue combineVectorSignBitsTruncation(SDNode 
*N, const SDLoc &DL,
   if (Subtarget.hasAVX512() &&
   !(!Subtarget.useAVX512Regs() && VT.is256BitVector() &&
 InVT.is512BitVector())) {
-// PACK should still be worth it for 128/256-bit vectors if the sources 
were
+// PACK should still be worth it for 128-bit vectors if the sources were
 // originally concatenated from subvectors.
 SmallVector ConcatOps;
-if (VT.getSizeInBits() > 256 || !collectConcatOps(In.getNode(), ConcatOps))
-  return SDValue();
+if (VT.getSizeInBits() > 128 || !collectConcatOps(In.getNode(), ConcatOps))
+return SDValue();
   }
 
   unsigned NumPackedSignBits = std::min(SVT.getSizeInBits(), 16);

diff  --git a/llvm/test/CodeGen/X86/pr48727.ll 
b/llvm/test/CodeGen/X86/pr48727.ll
new file mode 100644
index ..4fa16db14acc
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr48727.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=skx | FileCheck %s
+
+define void @PR48727() {
+; CHECK-LABEL: PR48727:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vcvttpd2dqy 0, %xmm0
+; CHECK-NEXT:vcvttpd2dqy 128, %xmm1
+; CHECK-NEXT:movq (%rax), %rax
+; CHECK-NEXT:vcvttpd2dqy 160, %xmm2
+; CHECK-NEXT:vinserti128 $1, %xmm2, %ymm1, %ymm1
+; CHECK-NEXT:vcvttpd2dqy (%rax), %xmm2
+; CHECK-NEXT:vinserti128 $1, %xmm2, %ymm0, %ymm0
+; CHECK-NEXT:vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT:vpmovdw %zmm0, %ymm0
+; CHECK-NEXT:vmovdqu %ymm0, 16(%rax)
+; CHECK-NEXT:vzeroupper
+; CHECK-NEXT:retq
+entry:
+  %0 = load [100 x [100 x i16]]*, [100 x [100 x i16]]** undef, align 8
+  %wide.load.2 = load <4 x double>, <4 x double>* null, align 16
+  %1 = fptosi <4 x double> %wide.load.2 to <4 x i16>
+  %2 = getelementptr inbounds [100 x [100 x i16]], [100 x [100 x i16]]* %0, 
i64 0, i64 0, i64 8
+  %3 = bitcast i16* %2 to <4 x i16>*
+  store <4 x i16> %1, <4 x i16>* %3, align 8
+  %wide.load.3 = load <4 x double>, <4 x double>* undef, align 16, 
!invariant.load !0, !noalias !1
+  %4 = fptosi <4 x double> %wide.load.3 to <4 x i16>
+  %5 = getelementptr inbounds [100 x [100 x i16]], [100 x [100 x i16]]* %0, 
i64 0, i64 0, i64 12
+  %6 = bitcast i16* %5 to <4 x i16>*
+  store <4 x i16> %4, <4 x i16>* %6, align 8
+  %7 = getelementptr inbounds [100 x [100 x double]], [100 x [100 x double]]* 
null, i64 0, i64 0, i64 16
+  %8 = bitcast double* %7 to <4 x double>*
+  %wide.load.4 = load <4 x double>, <4 x double>* %8, align 16, 
!invariant.load !0, !noalias !1
+  %9 = fptosi <4 x double> %wide.load.4 to <4 x i16>
+  %10 = getelementptr inbounds [100 x [100 x i16]], [100 x [100 x i16]]* %0, 
i64 0, i64 0, i64 16
+  %11 = bitcast i16* %10 to <4 x i16>*
+  store <4 x i16> %9, <4 x i16>* %11, align 8
+  %12 = getelementptr inbounds [100 x [100 x double]], [100 x [100 x double]]* 
null, i64 0, i64 0, i64 20
+  %13 = bitcast double* %12 to <4 x double>*
+  %wide.load.5 = load <4 x double>, <4 x double>* %13, align 16, 
!invariant.load !0, !noalias !1
+  %14 = fptosi <4 x double> %wide.load.5 to <4 x i16>
+  %15 = getelementptr inbounds [100 x [100 x i16]], [100 x [100 x i16]]* %0, 
i64 0, i64 0, i64 20
+  %16 = bitcast i16* %15 to <4 x i16>*
+  store <4 x i16> %14, <4 x i16>* %16, align 8
+  ret void
+}
+
+!0 = !{}
+!1 = !{!2}
+!2 = !{!"buffer: {index:1, offset:0, size:2}", !3}
+!3 = !{!"XLA global AA domain"}

diff  --git a/llvm/test/CodeGen/X86/vector-pack-256.ll 
b/llvm/test/CodeGen/X86/vector-pack-256.ll
index af06ddbd3f3a..b789b46906cb 100644
--- a/llvm/test/CodeGen/X86/vector-pack-256.ll
+++ b/llvm/test/CodeGen/X86/vector-pack-256.ll
@@ -31,7 +31,10 @@ define <16 x i16> @trunc_concat_packssdw_256(<8 x i32> %a0, 
<8 x i32> %a1) nounw
 ; AVX512:   # %bb.0:
 ; AVX512-NEXT:vpsrad $17, %ymm0, %ymm0
 ; AVX512-NEXT:vpsrad $23, %ymm1, %ymm1
-; AVX512-

[llvm-branch-commits] [llvm] 0a0ee7f - [X86] canonicalizeShuffleMaskWithHorizOp - minor refactor to support multiple src ops. NFCI.

2021-01-13 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-13T13:59:56Z
New Revision: 0a0ee7f5a5af0f5dae65452f649ab665e787e7d6

URL: 
https://github.com/llvm/llvm-project/commit/0a0ee7f5a5af0f5dae65452f649ab665e787e7d6
DIFF: 
https://github.com/llvm/llvm-project/commit/0a0ee7f5a5af0f5dae65452f649ab665e787e7d6.diff

LOG: [X86] canonicalizeShuffleMaskWithHorizOp - minor refactor to support 
multiple src ops. NFCI.

canonicalizeShuffleMaskWithHorizOp currently only supports shuffles with 1 or 2 
sources, but PR41813 will require us to support higher numbers of sources.

This patch just generalizes the initial setup stages to ensure all src ops are 
the same type and opcode and then will continue to early out if we have more 
than 2 sources.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5949782f3c0c..821cfc5f0c27 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36088,20 +36088,20 @@ static SDValue canonicalizeShuffleMaskWithHorizOp(
 MutableArrayRef Ops, MutableArrayRef Mask,
 unsigned RootSizeInBits, const SDLoc &DL, SelectionDAG &DAG,
 const X86Subtarget &Subtarget) {
-
-  // Combine binary shuffle of 2 similar 'Horizontal' instructions into a
-  // single instruction. Attempt to match a v2X64 repeating shuffle pattern 
that
-  // represents the LHS/RHS inputs for the lower/upper halves.
-  if (Mask.empty() || Ops.empty() || 2 < Ops.size())
+  if (Mask.empty() || Ops.empty())
 return SDValue();
 
-  SDValue BC0 = peekThroughBitcasts(Ops.front());
-  SDValue BC1 = peekThroughBitcasts(Ops.back());
+  SmallVector BC;
+  for (SDValue Op : Ops)
+BC.push_back(peekThroughBitcasts(Op));
+
+  // All ops must be the same horizop + type.
+  SDValue BC0 = BC[0];
   EVT VT0 = BC0.getValueType();
-  EVT VT1 = BC1.getValueType();
   unsigned Opcode0 = BC0.getOpcode();
-  unsigned Opcode1 = BC1.getOpcode();
-  if (Opcode0 != Opcode1 || VT0 != VT1 || VT0.getSizeInBits() != 
RootSizeInBits)
+  if (VT0.getSizeInBits() != RootSizeInBits || llvm::any_of(BC, [&](SDValue V) 
{
+return V.getOpcode() != Opcode0 || V.getValueType() != VT0;
+  }))
 return SDValue();
 
   bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD ||
@@ -36110,12 +36110,16 @@ static SDValue canonicalizeShuffleMaskWithHorizOp(
   if (!isHoriz && !isPack)
 return SDValue();
 
-  if (Mask.size() == VT0.getVectorNumElements()) {
-int NumElts = VT0.getVectorNumElements();
-int NumLanes = VT0.getSizeInBits() / 128;
-int NumEltsPerLane = NumElts / NumLanes;
-int NumHalfEltsPerLane = NumEltsPerLane / 2;
+  int NumElts = VT0.getVectorNumElements();
+  int NumLanes = VT0.getSizeInBits() / 128;
+  int NumEltsPerLane = NumElts / NumLanes;
+  int NumHalfEltsPerLane = NumEltsPerLane / 2;
+
+  if (2 < Ops.size())
+return SDValue();
 
+  SDValue BC1 = BC[BC.size() - 1];
+  if (Mask.size() == VT0.getVectorNumElements()) {
 // Canonicalize binary shuffles of horizontal ops that use the
 // same sources to an unary shuffle.
 // TODO: Try to perform this fold even if the shuffle remains.
@@ -36159,6 +36163,9 @@ static SDValue canonicalizeShuffleMaskWithHorizOp(
 }
   }
 
+  // Combine binary shuffle of 2 similar 'Horizontal' instructions into a
+  // single instruction. Attempt to match a v2X64 repeating shuffle pattern 
that
+  // represents the LHS/RHS inputs for the lower/upper halves.
   unsigned EltSizeInBits = RootSizeInBits / Mask.size();
   SmallVector TargetMask128, WideMask128;
   if (isRepeatedTargetShuffleMask(128, EltSizeInBits, Mask, TargetMask128) &&



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] cbbfc82 - [X86][SSE] canonicalizeShuffleMaskWithHorizOp - simplify shuffle(HOP(HOP(X, Y), HOP(Z, W))) style chains.

2021-01-13 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-13T17:19:40Z
New Revision: cbbfc8258615bc971a54c6287abe33c4215d2eac

URL: 
https://github.com/llvm/llvm-project/commit/cbbfc8258615bc971a54c6287abe33c4215d2eac
DIFF: 
https://github.com/llvm/llvm-project/commit/cbbfc8258615bc971a54c6287abe33c4215d2eac.diff

LOG: [X86][SSE] canonicalizeShuffleMaskWithHorizOp - simplify 
shuffle(HOP(HOP(X,Y),HOP(Z,W))) style chains.

See if we can remove the shuffle by resorting a HOP chain so that the HOP args 
are pre-shuffled.

This initial version just handles (the most common) v4i32/v4f32 hadd/hsub 
reduction patterns - future work can extend this to v8i16 types plus PACK 
chains (2f64 HADD/HSUB should already be handled in the half-lane combine code 
later on).

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/horizontal-sum.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 821cfc5f0c27..d45eb5366bfe 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36115,6 +36115,38 @@ static SDValue canonicalizeShuffleMaskWithHorizOp(
   int NumEltsPerLane = NumElts / NumLanes;
   int NumHalfEltsPerLane = NumEltsPerLane / 2;
 
+  // See if we can remove the shuffle by resorting the HOP chain so that
+  // the HOP args are pre-shuffled.
+  // TODO: Generalize to any sized/depth chain.
+  // TODO: Add support for PACKSS/PACKUS.
+  if (isHoriz && NumEltsPerLane == 4 && VT0.is128BitVector() &&
+  shouldUseHorizontalOp(Ops.size() == 1, DAG, Subtarget)) {
+SmallVector ScaledMask;
+if (scaleShuffleElements(Mask, 4, ScaledMask)) {
+  // Attempt to find a HOP(HOP(X,Y),HOP(Z,W)) source operand.
+  auto GetHOpSrc = [&](int M) {
+if (M == SM_SentinelUndef)
+  return DAG.getUNDEF(VT0);
+if (M == SM_SentinelZero)
+  return getZeroVector(VT0.getSimpleVT(), Subtarget, DAG, DL);
+SDValue Src0 = BC[M / NumElts];
+SDValue Src1 = Src0.getOperand((M % 4) >= 2);
+if (Src1.getOpcode() == Opcode0 && Src0->isOnlyUserOf(Src1.getNode()))
+  return Src1.getOperand(M % 2);
+return SDValue();
+  };
+  SDValue M0 = GetHOpSrc(ScaledMask[0]);
+  SDValue M1 = GetHOpSrc(ScaledMask[1]);
+  SDValue M2 = GetHOpSrc(ScaledMask[2]);
+  SDValue M3 = GetHOpSrc(ScaledMask[3]);
+  if (M0 && M1 && M2 && M3) {
+SDValue LHS = DAG.getNode(Opcode0, DL, VT0, M0, M1);
+SDValue RHS = DAG.getNode(Opcode0, DL, VT0, M2, M3);
+return DAG.getNode(Opcode0, DL, VT0, LHS, RHS);
+  }
+}
+  }
+
   if (2 < Ops.size())
 return SDValue();
 

diff  --git a/llvm/test/CodeGen/X86/horizontal-sum.ll 
b/llvm/test/CodeGen/X86/horizontal-sum.ll
index 47d44171d99a..315e795d7a37 100644
--- a/llvm/test/CodeGen/X86/horizontal-sum.ll
+++ b/llvm/test/CodeGen/X86/horizontal-sum.ll
@@ -38,13 +38,9 @@ define <4 x float> @pair_sum_v4f32_v4f32(<4 x float> %0, <4 
x float> %1, <4 x fl
 ;
 ; SSSE3-FAST-LABEL: pair_sum_v4f32_v4f32:
 ; SSSE3-FAST:   # %bb.0:
-; SSSE3-FAST-NEXT:haddps %xmm0, %xmm0
-; SSSE3-FAST-NEXT:haddps %xmm1, %xmm1
 ; SSSE3-FAST-NEXT:haddps %xmm1, %xmm0
-; SSSE3-FAST-NEXT:haddps %xmm2, %xmm2
-; SSSE3-FAST-NEXT:haddps %xmm3, %xmm3
-; SSSE3-FAST-NEXT:haddps %xmm2, %xmm3
-; SSSE3-FAST-NEXT:shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[2,0]
+; SSSE3-FAST-NEXT:haddps %xmm3, %xmm2
+; SSSE3-FAST-NEXT:haddps %xmm2, %xmm0
 ; SSSE3-FAST-NEXT:retq
 ;
 ; AVX1-SLOW-LABEL: pair_sum_v4f32_v4f32:
@@ -66,18 +62,12 @@ define <4 x float> @pair_sum_v4f32_v4f32(<4 x float> %0, <4 
x float> %1, <4 x fl
 ; AVX1-SLOW-NEXT:vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
 ; AVX1-SLOW-NEXT:retq
 ;
-; AVX1-FAST-LABEL: pair_sum_v4f32_v4f32:
-; AVX1-FAST:   # %bb.0:
-; AVX1-FAST-NEXT:vhaddps %xmm0, %xmm0, %xmm0
-; AVX1-FAST-NEXT:vhaddps %xmm1, %xmm1, %xmm1
-; AVX1-FAST-NEXT:vhaddps %xmm1, %xmm0, %xmm0
-; AVX1-FAST-NEXT:vhaddps %xmm2, %xmm2, %xmm1
-; AVX1-FAST-NEXT:vhaddps %xmm1, %xmm1, %xmm1
-; AVX1-FAST-NEXT:vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,1]
-; AVX1-FAST-NEXT:vhaddps %xmm3, %xmm3, %xmm1
-; AVX1-FAST-NEXT:vhaddps %xmm1, %xmm1, %xmm1
-; AVX1-FAST-NEXT:vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
-; AVX1-FAST-NEXT:retq
+; AVX-FAST-LABEL: pair_sum_v4f32_v4f32:
+; AVX-FAST:   # %bb.0:
+; AVX-FAST-NEXT:vhaddps %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT:vhaddps %xmm3, %xmm2, %xmm1
+; AVX-FAST-NEXT:vhaddps %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT:retq
 ;
 ; AVX2-SLOW-LABEL: pair_sum_v4f32_v4f32:
 ; AVX2-SLOW:   # %bb.0:
@@ -97,19 +87,6 @@ define <4 x float> @pair_sum_v4f32_v4f32(<4 x float> %0, <4 
x float> %1, <4 x fl
 ; AVX2-SLOW-NEXT:vaddps %xmm2, %xmm1, %xmm1
 ; AVX2-SLOW-NEXT:vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
 ; AVX2-SL

[llvm-branch-commits] [llvm] efb6e45 - [X86][AVX] Add test for another 'reverse HADD' pattern mentioned in PR41813

2021-01-13 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-13T17:19:41Z
New Revision: efb6e45d2be8e3e0843bdc4c2766e6910083c08e

URL: 
https://github.com/llvm/llvm-project/commit/efb6e45d2be8e3e0843bdc4c2766e6910083c08e
DIFF: 
https://github.com/llvm/llvm-project/commit/efb6e45d2be8e3e0843bdc4c2766e6910083c08e.diff

LOG: [X86][AVX] Add test for another 'reverse HADD' pattern mentioned in PR41813

Added: 


Modified: 
llvm/test/CodeGen/X86/haddsub-4.ll

Removed: 




diff  --git a/llvm/test/CodeGen/X86/haddsub-4.ll 
b/llvm/test/CodeGen/X86/haddsub-4.ll
index d0c62753f0d2..6003f98b9371 100644
--- a/llvm/test/CodeGen/X86/haddsub-4.ll
+++ b/llvm/test/CodeGen/X86/haddsub-4.ll
@@ -120,6 +120,38 @@ define <8 x float> @hadd_reverse2_v8f32(<8 x float> %a0, 
<8 x float> %a1) {
   ret <8 x float> %add
 }
 
+define <8 x float> @hadd_reverse3_v8f32(<8 x float> %a0, <8 x float> %a1) {
+; SSE-LABEL: hadd_reverse3_v8f32:
+; SSE:   # %bb.0:
+; SSE-NEXT:movaps %xmm0, %xmm4
+; SSE-NEXT:haddps %xmm2, %xmm4
+; SSE-NEXT:haddps %xmm3, %xmm1
+; SSE-NEXT:shufps {{.*#+}} xmm1 = xmm1[3,2,1,0]
+; SSE-NEXT:shufps {{.*#+}} xmm4 = xmm4[3,2,1,0]
+; SSE-NEXT:movaps %xmm1, %xmm0
+; SSE-NEXT:movaps %xmm4, %xmm1
+; SSE-NEXT:retq
+;
+; AVX1-LABEL: hadd_reverse3_v8f32:
+; AVX1:   # %bb.0:
+; AVX1-NEXT:vhaddps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX1-NEXT:vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT:retq
+;
+; AVX2-LABEL: hadd_reverse3_v8f32:
+; AVX2:   # %bb.0:
+; AVX2-NEXT:vhaddps %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX2-NEXT:vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX2-NEXT:retq
+  %shuf0 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> 
+  %shuf1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> 
+  %add = fadd <8 x float> %shuf0, %shuf1
+  %shuf2 = shufflevector <8 x float> %add, <8 x float> poison, <8 x i32> 
+  ret <8 x float> %shuf2
+}
+
 define <16 x i16> @hadd_reverse_v16i16(<16 x i16> %a0, <16 x i16> %a1) 
nounwind {
 ; SSE-LABEL: hadd_reverse_v16i16:
 ; SSE:   # %bb.0:



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 993c488 - [DAG] visitVECTOR_SHUFFLE - use all_of to check for all-undef shuffle mask. NFCI.

2021-01-13 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-13T17:19:41Z
New Revision: 993c488ed2b347011d9d71990af38a82aaf5bdf5

URL: 
https://github.com/llvm/llvm-project/commit/993c488ed2b347011d9d71990af38a82aaf5bdf5
DIFF: 
https://github.com/llvm/llvm-project/commit/993c488ed2b347011d9d71990af38a82aaf5bdf5.diff

LOG: [DAG] visitVECTOR_SHUFFLE - use all_of to check for all-undef shuffle 
mask. NFCI.

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5d9bb4e4a98b..7e4ee3bd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20901,11 +20901,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
 }
 
 // Check if all indices in Mask are Undef. In case, propagate Undef.
-bool isUndefMask = true;
-for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
-  isUndefMask &= Mask[i] < 0;
-
-if (isUndefMask)
+if (llvm::all_of(Mask, [](int M) { return M < 0; }))
   return DAG.getUNDEF(VT);
 
 if (!SV0.getNode())



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] af8d27a - [DAG] visitVECTOR_SHUFFLE - pull out shuffle merging code into lambda helper. NFCI.

2021-01-14 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-14T11:05:19Z
New Revision: af8d27a7a8266b89916b5e4db2b2fd97eb7d84e5

URL: 
https://github.com/llvm/llvm-project/commit/af8d27a7a8266b89916b5e4db2b2fd97eb7d84e5
DIFF: 
https://github.com/llvm/llvm-project/commit/af8d27a7a8266b89916b5e4db2b2fd97eb7d84e5.diff

LOG: [DAG] visitVECTOR_SHUFFLE - pull out shuffle merging code into lambda 
helper. NFCI.

Make it easier to reuse in a future patch.

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 24bc7fe7e0ad..f4c9b814b806 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20823,30 +20823,19 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   return DAG.getCommutedVectorShuffle(*SVN);
   }
 
-  // Try to fold according to rules:
-  //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
-  //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
-  //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
-  // Don't try to fold shuffles with illegal type.
-  // Only fold if this shuffle is the only user of the other shuffle.
-  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
-  Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
-ShuffleVectorSDNode *OtherSV = cast(N0);
-
+  // Compute the combined shuffle mask for a shuffle with SV0 as the first
+  // operand, and SV1 as the second operand.
+  // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask).
+  auto MergeInnerShuffle = [NumElts](ShuffleVectorSDNode *SVN,
+ ShuffleVectorSDNode *OtherSVN, SDValue N1,
+ SDValue &SV0, SDValue &SV1,
+ SmallVectorImpl &Mask) -> bool {
 // Don't try to fold splats; they're likely to simplify somehow, or they
 // might be free.
-if (OtherSV->isSplat())
-  return SDValue();
-
-// The incoming shuffle must be of the same type as the result of the
-// current shuffle.
-assert(OtherSV->getOperand(0).getValueType() == VT &&
-   "Shuffle types don't match");
+if (OtherSVN->isSplat())
+  return false;
 
-SDValue SV0, SV1;
-SmallVector Mask;
-// Compute the combined shuffle mask for a shuffle with SV0 as the first
-// operand, and SV1 as the second operand.
+Mask.clear();
 for (unsigned i = 0; i != NumElts; ++i) {
   int Idx = SVN->getMaskElt(i);
   if (Idx < 0) {
@@ -20859,15 +20848,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   if (Idx < (int)NumElts) {
 // This shuffle index refers to the inner shuffle N0. Lookup the inner
 // shuffle mask to identify which vector is actually referenced.
-Idx = OtherSV->getMaskElt(Idx);
+Idx = OtherSVN->getMaskElt(Idx);
 if (Idx < 0) {
   // Propagate Undef.
   Mask.push_back(Idx);
   continue;
 }
-
-CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
-   : OtherSV->getOperand(1);
+CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
+  : OtherSVN->getOperand(1);
   } else {
 // This shuffle index references an element within N1.
 CurrentVec = N1;
@@ -20892,31 +20880,52 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
 
   // Bail out if we cannot convert the shuffle pair into a single shuffle.
   if (SV1.getNode() && SV1 != CurrentVec)
-return SDValue();
+return false;
 
   // Ok. CurrentVec is the right hand side.
   // Update the mask accordingly.
   SV1 = CurrentVec;
   Mask.push_back(Idx + NumElts);
 }
+return true;
+  };
 
-// Check if all indices in Mask are Undef. In case, propagate Undef.
-if (llvm::all_of(Mask, [](int M) { return M < 0; }))
-  return DAG.getUNDEF(VT);
+  // Try to fold according to rules:
+  //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+  //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+  //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+  // Don't try to fold shuffles with illegal type.
+  // Only fold if this shuffle is the only user of the other shuffle.
+  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
+  Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
+ShuffleVectorSDNode *OtherSV = cast(N0);
+
+// The incoming shuffle must be of the same type as the result of the
+// current shuffle.
+assert(OtherSV->getOperand(0).getValueType() == VT &&
+   "Shuffle types don't match");
 
-if (!SV0.getNode())
-  SV0 = DAG.getUNDEF(VT);
-if (!SV1.getNode())
-  S

[llvm-branch-commits] [llvm] 8f1d7f3 - [X86] Improve sum-of-reductions v4f32 test coverage

2021-01-14 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-14T11:05:19Z
New Revision: 8f1d7f3753ca132b310bbb0e62c394cfa75daee5

URL: 
https://github.com/llvm/llvm-project/commit/8f1d7f3753ca132b310bbb0e62c394cfa75daee5
DIFF: 
https://github.com/llvm/llvm-project/commit/8f1d7f3753ca132b310bbb0e62c394cfa75daee5.diff

LOG: [X86] Improve sum-of-reductions v4f32 test coverage

Ensure that the v4f32 reductions use a -0.0f start value and add fast-math test 
variant.

Added: 


Modified: 
llvm/test/CodeGen/X86/horizontal-sum.ll

Removed: 




diff  --git a/llvm/test/CodeGen/X86/horizontal-sum.ll 
b/llvm/test/CodeGen/X86/horizontal-sum.ll
index 315e795d7a37..a5b34c482474 100644
--- a/llvm/test/CodeGen/X86/horizontal-sum.ll
+++ b/llvm/test/CodeGen/X86/horizontal-sum.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3   | FileCheck %s 
--check-prefixes=SSSE3,SSSE3-SLOW
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,fast-hops | FileCheck %s 
--check-prefixes=SSSE3,SSSE3-FAST
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s 
--check-prefixes=AVX,AVX-SLOW,AVX1-SLOW
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops   | FileCheck %s 
--check-prefixes=AVX,AVX-FAST,AVX1-FAST
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2| FileCheck %s 
--check-prefixes=AVX,AVX-SLOW,AVX2-SLOW
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,fast-hops  | FileCheck %s 
--check-prefixes=AVX,AVX-FAST,AVX2-FAST
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3   | FileCheck %s 
--check-prefixes=SSSE3-SLOW
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,fast-hops | FileCheck %s 
--check-prefixes=SSSE3-FAST
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s 
--check-prefixes=AVX-SLOW,AVX1-SLOW
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops   | FileCheck %s 
--check-prefixes=AVX-FAST,AVX1-FAST
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2| FileCheck %s 
--check-prefixes=AVX-SLOW,AVX2-SLOW
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,fast-hops  | FileCheck %s 
--check-prefixes=AVX-FAST,AVX2-FAST
 
 ; Vectorized Pairwise Sum Reductions
 ; e.g.
@@ -954,77 +954,137 @@ define <4 x i32> @sequential_sum_v4i32_v4i32(<4 x i32> 
%0, <4 x i32> %1, <4 x i3
 ; }
 
 define <4 x float> @reduction_sum_v4f32_v4f32(<4 x float> %0, <4 x float> %1, 
<4 x float> %2, <4 x float> %3) {
-; SSSE3-LABEL: reduction_sum_v4f32_v4f32:
-; SSSE3:   # %bb.0:
-; SSSE3-NEXT:movshdup {{.*#+}} xmm5 = xmm0[1,1,3,3]
-; SSSE3-NEXT:movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
-; SSSE3-NEXT:addss %xmm4, %xmm5
-; SSSE3-NEXT:movaps %xmm0, %xmm6
-; SSSE3-NEXT:unpckhpd {{.*#+}} xmm6 = xmm6[1],xmm0[1]
-; SSSE3-NEXT:addss %xmm5, %xmm6
-; SSSE3-NEXT:shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; SSSE3-NEXT:addss %xmm6, %xmm0
-; SSSE3-NEXT:movshdup {{.*#+}} xmm5 = xmm1[1,1,3,3]
-; SSSE3-NEXT:addss %xmm4, %xmm5
-; SSSE3-NEXT:movaps %xmm1, %xmm6
-; SSSE3-NEXT:unpckhpd {{.*#+}} xmm6 = xmm6[1],xmm1[1]
-; SSSE3-NEXT:addss %xmm5, %xmm6
-; SSSE3-NEXT:shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
-; SSSE3-NEXT:addss %xmm6, %xmm1
-; SSSE3-NEXT:unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSSE3-NEXT:movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSSE3-NEXT:addss %xmm4, %xmm1
-; SSSE3-NEXT:movaps %xmm2, %xmm5
-; SSSE3-NEXT:unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm2[1]
-; SSSE3-NEXT:addss %xmm1, %xmm5
-; SSSE3-NEXT:shufps {{.*#+}} xmm2 = xmm2[3,3,3,3]
-; SSSE3-NEXT:addss %xmm5, %xmm2
-; SSSE3-NEXT:movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
-; SSSE3-NEXT:addss %xmm4, %xmm1
-; SSSE3-NEXT:movaps %xmm3, %xmm4
-; SSSE3-NEXT:unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm3[1]
-; SSSE3-NEXT:addss %xmm1, %xmm4
-; SSSE3-NEXT:shufps {{.*#+}} xmm3 = xmm3[3,3,3,3]
-; SSSE3-NEXT:addss %xmm4, %xmm3
-; SSSE3-NEXT:unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSSE3-NEXT:movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSSE3-NEXT:retq
+; SSSE3-SLOW-LABEL: reduction_sum_v4f32_v4f32:
+; SSSE3-SLOW:   # %bb.0:
+; SSSE3-SLOW-NEXT:movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSSE3-SLOW-NEXT:addss %xmm0, %xmm4
+; SSSE3-SLOW-NEXT:movaps %xmm0, %xmm5
+; SSSE3-SLOW-NEXT:unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
+; SSSE3-SLOW-NEXT:addss %xmm4, %xmm5
+; SSSE3-SLOW-NEXT:shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; SSSE3-SLOW-NEXT:addss %xmm5, %xmm0
+; SSSE3-SLOW-NEXT:movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
+; SSSE3-SLOW-NEXT:addss %xmm1, %xmm4
+; SSSE3-SLOW-NEXT:movaps %xmm1, %xmm5
+; SSSE3-SLOW-NEXT:unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1]
+; SSSE3-SLOW-NEXT:addss %xmm4, %xmm5
+; SSSE3-SLOW-NEXT:shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]

[llvm-branch-commits] [llvm] 7c30c05 - [DAG] visitVECTOR_SHUFFLE - MergeInnerShuffle - reset shuffle ops and reorder early-out and second op matching. NFCI.

2021-01-14 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-14T11:55:20Z
New Revision: 7c30c05ff71d062f0b8a05b7c3c12ede2c285371

URL: 
https://github.com/llvm/llvm-project/commit/7c30c05ff71d062f0b8a05b7c3c12ede2c285371
DIFF: 
https://github.com/llvm/llvm-project/commit/7c30c05ff71d062f0b8a05b7c3c12ede2c285371.diff

LOG: [DAG] visitVECTOR_SHUFFLE - MergeInnerShuffle - reset shuffle ops and 
reorder early-out and second op matching. NFCI.

I'm hoping to reuse MergeInnerShuffle in some other folds - so ensure the 
candidate ops/mask are reset at the start of each run.

Also, move the second op matching before bailing to make it simpler to try to 
match other things afterward.

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f4c9b814b806..eaf9ad9ef6e2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20835,7 +20835,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
 if (OtherSVN->isSplat())
   return false;
 
+SV0 = SV1 = SDValue();
 Mask.clear();
+
 for (unsigned i = 0; i != NumElts; ++i) {
   int Idx = SVN->getMaskElt(i);
   if (Idx < 0) {
@@ -20877,15 +20879,16 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
 Mask.push_back(Idx);
 continue;
   }
+  if (!SV1.getNode() || SV1 == CurrentVec) {
+// Ok. CurrentVec is the right hand side.
+// Update the mask accordingly.
+SV1 = CurrentVec;
+Mask.push_back(Idx + NumElts);
+continue;
+  }
 
   // Bail out if we cannot convert the shuffle pair into a single shuffle.
-  if (SV1.getNode() && SV1 != CurrentVec)
-return false;
-
-  // Ok. CurrentVec is the right hand side.
-  // Update the mask accordingly.
-  SV1 = CurrentVec;
-  Mask.push_back(Idx + NumElts);
+  return false;
 }
 return true;
   };



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] e8622d2 - [Support] Add KnownBits::sextInReg exhaustive tests

2021-01-14 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-14T14:27:45Z
New Revision: e8622d27c0e3020177ff47ad57dd1e5371feb9cf

URL: 
https://github.com/llvm/llvm-project/commit/e8622d27c0e3020177ff47ad57dd1e5371feb9cf
DIFF: 
https://github.com/llvm/llvm-project/commit/e8622d27c0e3020177ff47ad57dd1e5371feb9cf.diff

LOG: [Support] Add KnownBits::sextInReg exhaustive tests

Requested by @foad in rG9cf4f493a72f

Added: 


Modified: 
llvm/unittests/Support/KnownBitsTest.cpp

Removed: 




diff  --git a/llvm/unittests/Support/KnownBitsTest.cpp 
b/llvm/unittests/Support/KnownBitsTest.cpp
index ba587a1e2f65..991096098b8e 100644
--- a/llvm/unittests/Support/KnownBitsTest.cpp
+++ b/llvm/unittests/Support/KnownBitsTest.cpp
@@ -425,4 +425,24 @@ TEST(KnownBitsTest, SExtOrTrunc) {
   }
 }
 
+TEST(KnownBitsTest, SExtInReg) {
+  unsigned Bits = 4;
+  for (unsigned FromBits = 1; FromBits != Bits; ++FromBits) {
+ForeachKnownBits(Bits, [&](const KnownBits &Known) {
+  APInt CommonOne = APInt::getAllOnesValue(Bits);
+  APInt CommonZero = APInt::getAllOnesValue(Bits);
+  unsigned ExtBits = Bits - FromBits;
+  ForeachNumInKnownBits(Known, [&](const APInt &N) {
+APInt Ext = N << ExtBits;
+Ext.ashrInPlace(ExtBits);
+CommonOne &= Ext;
+CommonZero &= ~Ext;
+  });
+  KnownBits KnownSExtInReg = Known.sextInReg(FromBits);
+  EXPECT_EQ(CommonOne, KnownSExtInReg.One);
+  EXPECT_EQ(CommonZero, KnownSExtInReg.Zero);
+});
+  }
+}
+
 } // end anonymous namespace



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 0b46f19 - [Support] Ensure KnownBits::sextInReg can handle the src == dst sext-in-reg case.

2021-01-14 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-14T14:50:21Z
New Revision: 0b46f19a9ecd6215cffb51d19f2403c18b0226f5

URL: 
https://github.com/llvm/llvm-project/commit/0b46f19a9ecd6215cffb51d19f2403c18b0226f5
DIFF: 
https://github.com/llvm/llvm-project/commit/0b46f19a9ecd6215cffb51d19f2403c18b0226f5.diff

LOG: [Support] Ensure KnownBits::sextInReg can handle the src == dst 
sext-in-reg case.

This was resulting in assertions inside APInt::zext that we were extending to 
the same bitwidth.

Added: 


Modified: 
llvm/lib/Support/KnownBits.cpp
llvm/unittests/Support/KnownBitsTest.cpp

Removed: 




diff  --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index 0f36c6a9ef1d..a46a90bb97d4 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -85,7 +85,11 @@ KnownBits KnownBits::computeForAddSub(bool Add, bool NSW,
 
 KnownBits KnownBits::sextInReg(unsigned SrcBitWidth) const {
   unsigned BitWidth = getBitWidth();
-  assert(BitWidth >= SrcBitWidth && "Illegal sext-in-register");
+  assert(0 < SrcBitWidth && SrcBitWidth <= BitWidth &&
+ "Illegal sext-in-register");
+
+  if (SrcBitWidth == BitWidth)
+return *this;
 
   // Sign extension.  Compute the demanded bits in the result that are not
   // present in the input.

diff  --git a/llvm/unittests/Support/KnownBitsTest.cpp 
b/llvm/unittests/Support/KnownBitsTest.cpp
index 991096098b8e..4e69df49837e 100644
--- a/llvm/unittests/Support/KnownBitsTest.cpp
+++ b/llvm/unittests/Support/KnownBitsTest.cpp
@@ -427,7 +427,7 @@ TEST(KnownBitsTest, SExtOrTrunc) {
 
 TEST(KnownBitsTest, SExtInReg) {
   unsigned Bits = 4;
-  for (unsigned FromBits = 1; FromBits != Bits; ++FromBits) {
+  for (unsigned FromBits = 1; FromBits <= Bits; ++FromBits) {
 ForeachKnownBits(Bits, [&](const KnownBits &Known) {
   APInt CommonOne = APInt::getAllOnesValue(Bits);
   APInt CommonZero = APInt::getAllOnesValue(Bits);



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] c0939fd - [Support] Simplify KnownBits::sextInReg implementation.

2021-01-14 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-14T15:14:32Z
New Revision: c0939fddf80c16829502186e2e5b78f77696310a

URL: 
https://github.com/llvm/llvm-project/commit/c0939fddf80c16829502186e2e5b78f77696310a
DIFF: 
https://github.com/llvm/llvm-project/commit/c0939fddf80c16829502186e2e5b78f77696310a.diff

LOG: [Support] Simplify KnownBits::sextInReg implementation.

As noted by @foad in rG9cf4f493a72f all we need to do is sextInReg both 
KnownBits One and Zero.

Added: 


Modified: 
llvm/lib/Support/KnownBits.cpp

Removed: 




diff  --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index a46a90bb97d4..3623a54ae476 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -91,34 +91,12 @@ KnownBits KnownBits::sextInReg(unsigned SrcBitWidth) const {
   if (SrcBitWidth == BitWidth)
 return *this;
 
-  // Sign extension.  Compute the demanded bits in the result that are not
-  // present in the input.
-  APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
-
-  // If the sign extended bits are demanded, we know that the sign
-  // bit is demanded.
-  APInt InSignMask = APInt::getSignMask(SrcBitWidth).zext(BitWidth);
-  APInt InDemandedBits = APInt::getLowBitsSet(BitWidth, SrcBitWidth);
-  if (NewBits.getBoolValue())
-InDemandedBits |= InSignMask;
-
+  unsigned ExtBits = BitWidth - SrcBitWidth;
   KnownBits Result;
-  Result.One = One & InDemandedBits;
-  Result.Zero = Zero & InDemandedBits;
-
-  // If the sign bit of the input is known set or clear, then we know the
-  // top bits of the result.
-  if (Result.Zero.intersects(InSignMask)) { // Input sign bit known clear
-Result.Zero |= NewBits;
-Result.One &= ~NewBits;
-  } else if (Result.One.intersects(InSignMask)) { // Input sign bit known set
-Result.One |= NewBits;
-Result.Zero &= ~NewBits;
-  } else { // Input sign bit unknown
-Result.Zero &= ~NewBits;
-Result.One &= ~NewBits;
-  }
-
+  Result.One = One << ExtBits;
+  Result.Zero = Zero << ExtBits;
+  Result.One.ashrInPlace(ExtBits);
+  Result.Zero.ashrInPlace(ExtBits);
   return Result;
 }
 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 0a59647 - [SystemZ] misched-cutoff tests can only be tested on non-NDEBUG (assertion) builds

2021-01-14 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-14T15:46:27Z
New Revision: 0a59647ee407524e6468cc5be4ba288861aa700d

URL: 
https://github.com/llvm/llvm-project/commit/0a59647ee407524e6468cc5be4ba288861aa700d
DIFF: 
https://github.com/llvm/llvm-project/commit/0a59647ee407524e6468cc5be4ba288861aa700d.diff

LOG: [SystemZ] misched-cutoff tests can only be tested on non-NDEBUG 
(assertion) builds

Fixes clang-with-thin-lto-ubuntu buildbot after D94383/rGddd03842c347

Added: 


Modified: 
llvm/test/CodeGen/SystemZ/misched-cutoff.ll

Removed: 




diff  --git a/llvm/test/CodeGen/SystemZ/misched-cutoff.ll 
b/llvm/test/CodeGen/SystemZ/misched-cutoff.ll
index 0de80a22c301..859c7398f2cd 100644
--- a/llvm/test/CodeGen/SystemZ/misched-cutoff.ll
+++ b/llvm/test/CodeGen/SystemZ/misched-cutoff.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -misched-cutoff=1 -o /dev/null < 
%s
-;
+; REQUIRES: asserts
+; -misched=shuffle isn't available in NDEBUG builds!
+
 ; Test that the post-ra scheduler does not crash with -misched-cutoff.
 
 @g_184 = external dso_local global i16, align 2



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] d0dbb04 - [Support] Remove redundant sign bit tests from KnownBits::getSignedMinValue/getSignedMaxValue

2021-01-14 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-14T15:46:26Z
New Revision: d0dbb0468c26bafa88e7340781fb3a0a79379470

URL: 
https://github.com/llvm/llvm-project/commit/d0dbb0468c26bafa88e7340781fb3a0a79379470
DIFF: 
https://github.com/llvm/llvm-project/commit/d0dbb0468c26bafa88e7340781fb3a0a79379470.diff

LOG: [Support] Remove redundant sign bit tests from 
KnownBits::getSignedMinValue/getSignedMaxValue

As noted by @foad on rG6895581fd2c1

Added: 


Modified: 
llvm/include/llvm/Support/KnownBits.h

Removed: 




diff  --git a/llvm/include/llvm/Support/KnownBits.h 
b/llvm/include/llvm/Support/KnownBits.h
index edb771d659e2..d854aadbd430 100644
--- a/llvm/include/llvm/Support/KnownBits.h
+++ b/llvm/include/llvm/Support/KnownBits.h
@@ -125,7 +125,7 @@ struct KnownBits {
 // Assume that all bits that aren't known-ones are zeros.
 APInt Min = One;
 // Sign bit is unknown.
-if (Zero.isSignBitClear() && One.isSignBitClear())
+if (Zero.isSignBitClear())
   Min.setSignBit();
 return Min;
   }
@@ -141,7 +141,7 @@ struct KnownBits {
 // Assume that all bits that aren't known-zeros are ones.
 APInt Max = ~Zero;
 // Sign bit is unknown.
-if (Zero.isSignBitClear() && One.isSignBitClear())
+if (One.isSignBitClear())
   Max.clearSignBit();
 return Max;
   }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] b99782c - [X86][AVX] Adjust unsigned saturation downconvert negative test

2021-01-14 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-14T17:51:23Z
New Revision: b99782cf7850a481fa36fd95ae04923739e0da6d

URL: 
https://github.com/llvm/llvm-project/commit/b99782cf7850a481fa36fd95ae04923739e0da6d
DIFF: 
https://github.com/llvm/llvm-project/commit/b99782cf7850a481fa36fd95ae04923739e0da6d.diff

LOG: [X86][AVX] Adjust unsigned saturation downconvert negative test

D87145 was showing that this test (added in D45315) could always be constant 
folded (with suitable value tracking).

What we actually needed was smax(smin()) negative test coverage, the invert of 
negative_test2_smax_usat_trunc_wb_256_mem, so I've tweaked the test to provide 
that instead.

Added: 


Modified: 
llvm/test/CodeGen/X86/avx512-trunc.ll

Removed: 




diff  --git a/llvm/test/CodeGen/X86/avx512-trunc.ll 
b/llvm/test/CodeGen/X86/avx512-trunc.ll
index 0b2a47c2772c..d61ada4e5d05 100644
--- a/llvm/test/CodeGen/X86/avx512-trunc.ll
+++ b/llvm/test/CodeGen/X86/avx512-trunc.ll
@@ -1007,10 +1007,8 @@ define <16 x i16> @smax_usat_trunc_dw_512(<16 x i32> %i) 
{
 define void @negative_test1_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x 
i8>* %res) {
 ; KNL-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
 ; KNL:   ## %bb.0:
-; KNL-NEXT:vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT:vpmaxsw %ymm1, %ymm0, %ymm0
-; KNL-NEXT:vpcmpeqd %ymm1, %ymm1, %ymm1
-; KNL-NEXT:vpminsw %ymm1, %ymm0, %ymm0
+; KNL-NEXT:vpminsw {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT:vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
 ; KNL-NEXT:vpmovzxwd {{.*#+}} zmm0 = 
ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
 ; KNL-NEXT:vpmovdb %zmm0, (%rdi)
 ; KNL-NEXT:vzeroupper
@@ -1018,17 +1016,15 @@ define void 
@negative_test1_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>*
 ;
 ; SKX-LABEL: negative_test1_smax_usat_trunc_wb_256_mem:
 ; SKX:   ## %bb.0:
-; SKX-NEXT:vpxor %xmm1, %xmm1, %xmm1
-; SKX-NEXT:vpmaxsw %ymm1, %ymm0, %ymm0
-; SKX-NEXT:vpcmpeqd %ymm1, %ymm1, %ymm1
-; SKX-NEXT:vpminsw %ymm1, %ymm0, %ymm0
+; SKX-NEXT:vpminsw {{.*}}(%rip), %ymm0, %ymm0
+; SKX-NEXT:vpmaxsw {{.*}}(%rip), %ymm0, %ymm0
 ; SKX-NEXT:vpmovwb %ymm0, (%rdi)
 ; SKX-NEXT:vzeroupper
 ; SKX-NEXT:retq
-  %x1 = icmp sgt <16 x i16> %i, 
-  %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> 
-  %x3 = icmp slt <16 x i16> %x2, 
-  %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> 
+  %x1 = icmp slt <16 x i16> %i, 
+  %x2 = select <16 x i1> %x1, <16 x i16> %i, <16 x i16> 
+  %x3 = icmp sgt <16 x i16> %x2, 
+  %x5 = select <16 x i1> %x3, <16 x i16> %x2, <16 x i16> 
   %x6 = trunc <16 x i16> %x5 to <16 x i8>
   store <16 x i8> %x6, <16 x i8>* %res, align 1
   ret void



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 1dfd5c9 - [X86][AVX] combineHorizOpWithShuffle - support target shuffles in HOP(SHUFFLE(X, Y), SHUFFLE(X, Y)) -> SHUFFLE(HOP(X, Y))

2021-01-15 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-15T13:55:30Z
New Revision: 1dfd5c9ad8cf677fb4c9c3ccf39d7b1f20c397d3

URL: 
https://github.com/llvm/llvm-project/commit/1dfd5c9ad8cf677fb4c9c3ccf39d7b1f20c397d3
DIFF: 
https://github.com/llvm/llvm-project/commit/1dfd5c9ad8cf677fb4c9c3ccf39d7b1f20c397d3.diff

LOG: [X86][AVX] combineHorizOpWithShuffle - support target shuffles in 
HOP(SHUFFLE(X,Y),SHUFFLE(X,Y)) -> SHUFFLE(HOP(X,Y))

Be more aggressive on (AVX2+) folds of lane shuffles of 256-bit horizontal ops 
by working on target/faux shuffles as well.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/haddsub-2.ll
llvm/test/CodeGen/X86/haddsub-undef.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d45eb5366bfe..a84250782c19 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43114,30 +43114,32 @@ static SDValue combineHorizOpWithShuffle(SDNode *N, 
SelectionDAG &DAG,
   // Attempt to fold HOP(SHUFFLE(X,Y),SHUFFLE(X,Y)) -> SHUFFLE(HOP(X,Y)).
   // TODO: Relax shuffle scaling to support sub-128-bit subvector shuffles.
   if (VT.is256BitVector() && Subtarget.hasInt256()) {
-if (auto *SVN0 = dyn_cast(N0)) {
-  if (auto *SVN1 = dyn_cast(N1)) {
-SmallVector ShuffleMask0, ShuffleMask1;
-if (scaleShuffleElements(SVN0->getMask(), 2, ShuffleMask0) &&
-scaleShuffleElements(SVN1->getMask(), 2, ShuffleMask1)) {
-  SDValue Op00 = SVN0->getOperand(0);
-  SDValue Op01 = SVN0->getOperand(1);
-  SDValue Op10 = SVN1->getOperand(0);
-  SDValue Op11 = SVN1->getOperand(1);
-  if ((Op00 == Op11) && (Op01 == Op10)) {
-std::swap(Op10, Op11);
-ShuffleVectorSDNode::commuteMask(ShuffleMask1);
-  }
-  if ((Op00 == Op10) && (Op01 == Op11)) {
-SmallVector ShuffleMask;
-ShuffleMask.append(ShuffleMask0.begin(), ShuffleMask0.end());
-ShuffleMask.append(ShuffleMask1.begin(), ShuffleMask1.end());
-SDLoc DL(N);
-MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64;
-SDValue Res = DAG.getNode(Opcode, DL, VT, Op00, Op01);
-Res = DAG.getBitcast(ShufVT, Res);
-Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, ShuffleMask);
-return DAG.getBitcast(VT, Res);
-  }
+SmallVector Mask0, Mask1;
+SmallVector Ops0, Ops1;
+if (getTargetShuffleInputs(N0, Ops0, Mask0, DAG) && !isAnyZero(Mask0) &&
+getTargetShuffleInputs(N1, Ops1, Mask1, DAG) && !isAnyZero(Mask1) &&
+!Ops0.empty() && !Ops1.empty()) {
+  SDValue Op00 = Ops0.front(), Op01 = Ops0.back();
+  SDValue Op10 = Ops1.front(), Op11 = Ops1.back();
+  SmallVector ShuffleMask0, ShuffleMask1;
+  if (Op00.getValueType() == SrcVT && Op01.getValueType() == SrcVT &&
+  Op11.getValueType() == SrcVT && Op11.getValueType() == SrcVT &&
+  scaleShuffleElements(Mask0, 2, ShuffleMask0) &&
+  scaleShuffleElements(Mask1, 2, ShuffleMask1)) {
+if ((Op00 == Op11) && (Op01 == Op10)) {
+  std::swap(Op10, Op11);
+  ShuffleVectorSDNode::commuteMask(ShuffleMask1);
+}
+if ((Op00 == Op10) && (Op01 == Op11)) {
+  SmallVector ShuffleMask;
+  ShuffleMask.append(ShuffleMask0.begin(), ShuffleMask0.end());
+  ShuffleMask.append(ShuffleMask1.begin(), ShuffleMask1.end());
+  SDLoc DL(N);
+  MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64;
+  SDValue Res = DAG.getNode(Opcode, DL, VT, Op00, Op01);
+  Res = DAG.getBitcast(ShufVT, Res);
+  Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, ShuffleMask);
+  return DAG.getBitcast(VT, Res);
 }
   }
 }

diff  --git a/llvm/test/CodeGen/X86/haddsub-2.ll 
b/llvm/test/CodeGen/X86/haddsub-2.ll
index a025604f44a5..82fd7a2699a5 100644
--- a/llvm/test/CodeGen/X86/haddsub-2.ll
+++ b/llvm/test/CodeGen/X86/haddsub-2.ll
@@ -444,12 +444,18 @@ define <4 x double> @avx_vhadd_pd_test(<4 x double> %A, 
<4 x double> %B) {
 ; SSE-NEXT:movapd %xmm2, %xmm1
 ; SSE-NEXT:retq
 ;
-; AVX-LABEL: avx_vhadd_pd_test:
-; AVX:   # %bb.0:
-; AVX-NEXT:vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
-; AVX-NEXT:vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX-NEXT:vhaddpd %ymm2, %ymm0, %ymm0
-; AVX-NEXT:retq
+; AVX1-LABEL: avx_vhadd_pd_test:
+; AVX1:   # %bb.0:
+; AVX1-NEXT:vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
+; AVX1-NEXT:vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:vhaddpd %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:retq
+;
+; AVX2-LABEL: avx_vhadd_pd_test:
+; AVX2:   # %bb.0:
+; AVX2-NEXT:vhaddpd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT:retq
   %vecext =

[llvm-branch-commits] [llvm] 5183a13 - [X86] Add umin knownbits/demandedbits ult test for D94532

2021-01-15 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-15T14:42:55Z
New Revision: 5183a13d37825f93d92c23c257dbb1c994098bdc

URL: 
https://github.com/llvm/llvm-project/commit/5183a13d37825f93d92c23c257dbb1c994098bdc
DIFF: 
https://github.com/llvm/llvm-project/commit/5183a13d37825f93d92c23c257dbb1c994098bdc.diff

LOG: [X86] Add umin knownbits/demandedbits ult test for D94532

Added: 


Modified: 
llvm/test/CodeGen/X86/combine-umin.ll

Removed: 




diff  --git a/llvm/test/CodeGen/X86/combine-umin.ll 
b/llvm/test/CodeGen/X86/combine-umin.ll
index 558d4df9adb4..b22c45bbce45 100644
--- a/llvm/test/CodeGen/X86/combine-umin.ll
+++ b/llvm/test/CodeGen/X86/combine-umin.ll
@@ -1,11 +1,33 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s 
--check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s 
--check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s 
--check-prefix=SSE42
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s 
--check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s 
--check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s 
--check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck 
%s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s 
--check-prefixes=CHECK,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s 
--check-prefixes=CHECK,SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s 
--check-prefixes=CHECK,SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s 
--check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s 
--check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s 
--check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck 
%s --check-prefixes=CHECK,AVX
+
+define i8 @test_demandedbits_umin_ult(i8 %a0, i8 %a1) {
+; CHECK-LABEL: test_demandedbits_umin_ult:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:orb $12, %dil
+; CHECK-NEXT:orb $4, %sil
+; CHECK-NEXT:andb $13, %dil
+; CHECK-NEXT:andb $12, %sil
+; CHECK-NEXT:movzbl %dil, %ecx
+; CHECK-NEXT:movzbl %sil, %eax
+; CHECK-NEXT:cmpb %al, %cl
+; CHECK-NEXT:cmovbl %ecx, %eax
+; CHECK-NEXT:# kill: def $al killed $al killed $eax
+; CHECK-NEXT:retq
+  %lhs0 = and i8 %a0, 13  ; b1101
+  %rhs0 = and i8 %a1, 12  ; b1100
+  %lhs1 = or i8 %lhs0, 12 ; b1100
+  %rhs1 = or i8 %rhs0, 4  ; b0100
+  %umin = tail call i8 @llvm.umin.i8(i8 %lhs1, i8 %rhs1)
+  ret i8 %umin
+}
+declare i8 @llvm.umin.i8(i8, i8)
 
 define <8 x i16> @test_v8i16_nosignbit(<8 x i16> %a, <8 x i16> %b) {
 ; SSE2-LABEL: test_v8i16_nosignbit:



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 46aa3c6 - [DAG] visitVECTOR_SHUFFLE - MergeInnerShuffle - improve shuffle(shuffle(x, y), shuffle(x, y)) merging

2021-01-15 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-15T15:08:31Z
New Revision: 46aa3c6c331c20b8a4c358488fefab96f944e814

URL: 
https://github.com/llvm/llvm-project/commit/46aa3c6c331c20b8a4c358488fefab96f944e814
DIFF: 
https://github.com/llvm/llvm-project/commit/46aa3c6c331c20b8a4c358488fefab96f944e814.diff

LOG: [DAG] visitVECTOR_SHUFFLE - MergeInnerShuffle - improve 
shuffle(shuffle(x,y),shuffle(x,y)) merging

MergeInnerShuffle currently attempts to merge shuffle(shuffle(x,y),z) patterns 
into a single shuffle, using 1 or 2 of the x,y,z ops.

However if we already match 2 ops we might be able to handle the third op if 
its also a shuffle that references one of the previous ops, allowing us to 
handle some cases like:

shuffle(shuffle(x,y),shuffle(x,y))
shuffle(shuffle(shuffle(x,z),y),z)
shuffle(shuffle(x,shuffle(x,y)),z)
etc.

This isn't an exhaustive match and is dependent on the order the candidate ops 
are encountered - if one of the matched ops was a shuffle that was peek-able we 
don't go back and try to split that, I haven't found much need for that amount 
of analysis yet.

This is a preliminary patch that will allow us to later improve x86 HADD/HSUB 
matching - but needs to be reviewed separately as its in generic code and 
affects existing Thumb2 tests.

Differential Revision: https://reviews.llvm.org/D94671

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
llvm/test/CodeGen/Thumb2/mve-vst2.ll

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index eaf9ad9ef6e2..e265bcea5945 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20887,6 +20887,32 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
 continue;
   }
 
+  // Last chance - see if the vector is another shuffle and if it
+  // uses one of the existing candidate shuffle ops.
+  if (auto *CurrentSVN = dyn_cast(CurrentVec)) {
+int InnerIdx = CurrentSVN->getMaskElt(Idx);
+if (InnerIdx < 0) {
+  Mask.push_back(-1);
+  continue;
+}
+SDValue InnerVec = (InnerIdx < (int)NumElts)
+   ? CurrentSVN->getOperand(0)
+   : CurrentSVN->getOperand(1);
+if (InnerVec.isUndef()) {
+  Mask.push_back(-1);
+  continue;
+}
+InnerIdx %= NumElts;
+if (InnerVec == SV0) {
+  Mask.push_back(InnerIdx);
+  continue;
+}
+if (InnerVec == SV1) {
+  Mask.push_back(InnerIdx + NumElts);
+  continue;
+}
+  }
+
   // Bail out if we cannot convert the shuffle pair into a single shuffle.
   return false;
 }

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll 
b/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
index bf1153fe0e26..411e90152e0e 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
@@ -10,37 +10,26 @@ define arm_aapcs_vfpcc void @test32(i32* noalias nocapture 
readonly %x, i32* noa
 ; CHECK-NEXT:blt .LBB0_2
 ; CHECK-NEXT:  .LBB0_1: @ %vector.body
 ; CHECK-NEXT:@ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:vldrw.u32 q1, [r0], #16
-; CHECK-NEXT:vldrw.u32 q2, [r1], #16
+; CHECK-NEXT:vldrw.u32 q0, [r0], #16
+; CHECK-NEXT:vldrw.u32 q1, [r1], #16
 ; CHECK-NEXT:subs r3, #4
-; CHECK-NEXT:vmullt.s32 q0, q2, q1
-; CHECK-NEXT:vmullb.s32 q3, q2, q1
-; CHECK-NEXT:vmov r5, s3
-; CHECK-NEXT:vmov r12, s2
-; CHECK-NEXT:vmov r7, s1
+; CHECK-NEXT:vmullb.s32 q2, q1, q0
+; CHECK-NEXT:vmullt.s32 q3, q1, q0
+; CHECK-NEXT:vmov r5, s11
+; CHECK-NEXT:vmov r12, s10
 ; CHECK-NEXT:lsrl r12, r5, #31
-; CHECK-NEXT:vmov r4, s0
-; CHECK-NEXT:lsrl r4, r7, #31
-; CHECK-NEXT:vmov q0[2], q0[0], r4, r12
-; CHECK-NEXT:vmov r12, s14
-; CHECK-NEXT:vmov q0[3], q0[1], r7, r5
+; CHECK-NEXT:vmov r4, s8
+; CHECK-NEXT:vmov r5, s9
+; CHECK-NEXT:lsrl r4, r5, #31
+; CHECK-NEXT:vmov q2[2], q2[0], r4, r12
 ; CHECK-NEXT:vmov r5, s15
-; CHECK-NEXT:vmov r7, s13
+; CHECK-NEXT:vmov r12, s14
 ; CHECK-NEXT:lsrl r12, r5, #31
 ; CHECK-NEXT:vmov r4, s12
-; CHECK-NEXT:lsrl r4, r7, #31
-; CHECK-NEXT:vmov q1[2], q1[0], r4, r12
-; CHECK-NEXT:vmov q1[3], q1[1], r7, r5
-; CHECK-NEXT:vmov.f32 s8, s6
-; CHECK-NEXT:vmov.f32 s9, s7
-; CHECK-NEXT:vmov.f32 s6, s0
-; CHECK-NEXT:vmov.f32 s7, s1
-; CHECK-NEXT:vmov.f32 s10, s2
-; CHECK-NEXT:vmov.f32 s5, s6
-; CHECK-NEXT:vmov.f32 s11, s3
-; CHECK-NEXT:vmov.f32 s6, s8
-; CHECK-NEXT:vmov.f32 s7, s10
-; CHECK-NEXT:vstrb.8 q1, [r2], #16
+; CHECK-NEXT:vmov r5, s13
+; CHECK-NEXT:lsrl r4, r5, #31
+; CHECK-NEXT:vmov q2[3], q2[1], r4, r12

[llvm-branch-commits] [llvm] be69e66 - [X86][SSE] Attempt to fold shuffle(binop(), binop()) -> binop(shuffle(), shuffle())

2021-01-15 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-15T16:25:25Z
New Revision: be69e66b1cd826f499566e1c3dadbf04e872baa0

URL: 
https://github.com/llvm/llvm-project/commit/be69e66b1cd826f499566e1c3dadbf04e872baa0
DIFF: 
https://github.com/llvm/llvm-project/commit/be69e66b1cd826f499566e1c3dadbf04e872baa0.diff

LOG: [X86][SSE] Attempt to fold shuffle(binop(),binop()) -> 
binop(shuffle(),shuffle())

If this will help us fold shuffles together, then push the shuffle through the 
merged binops.

Ideally this would be performed in DAGCombiner::visitVECTOR_SHUFFLE but getting 
an efficient+legal merged shuffle can be tricky - on SSE we can be confident 
that for 32/64-bit elements vectors shuffles should easily fold.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/haddsub-shuf.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a84250782c19..d2cc2395576a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37939,6 +37939,33 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG 
&DAG,
 
 if (SDValue HAddSub = foldShuffleOfHorizOp(N, DAG))
   return HAddSub;
+
+// Merge shuffles through binops if its likely we'll be able to merge it
+// with other shuffles.
+// shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
+// TODO: We might be able to move this to DAGCombiner::visitVECTOR_SHUFFLE.
+if (auto *SVN = dyn_cast(N)) {
+  unsigned SrcOpcode = N->getOperand(0).getOpcode();
+  if (SrcOpcode == N->getOperand(1).getOpcode() && TLI.isBinOp(SrcOpcode) 
&&
+  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
+  N->isOnlyUserOf(N->getOperand(1).getNode()) &&
+  VT.getScalarSizeInBits() >= 32) {
+SDValue Op00 = N->getOperand(0).getOperand(0);
+SDValue Op10 = N->getOperand(1).getOperand(0);
+SDValue Op01 = N->getOperand(0).getOperand(1);
+SDValue Op11 = N->getOperand(1).getOperand(1);
+if ((Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
+ Op10.getOpcode() == ISD::VECTOR_SHUFFLE) &&
+(Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
+ Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
+  SDLoc DL(N);
+  ArrayRef Mask = SVN->getMask();
+  SDValue LHS = DAG.getVectorShuffle(VT, DL, Op00, Op10, Mask);
+  SDValue RHS = DAG.getVectorShuffle(VT, DL, Op01, Op11, Mask);
+  return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
+}
+  }
+}
   }
 
   // Attempt to combine into a vector load/broadcast.

diff  --git a/llvm/test/CodeGen/X86/haddsub-shuf.ll 
b/llvm/test/CodeGen/X86/haddsub-shuf.ll
index 9b2dfc1ce0cb..37eedcd54441 100644
--- a/llvm/test/CodeGen/X86/haddsub-shuf.ll
+++ b/llvm/test/CodeGen/X86/haddsub-shuf.ll
@@ -923,45 +923,15 @@ define <4 x float> @PR34724_1(<4 x float> %a, <4 x float> 
%b) {
 }
 
 define <4 x float> @PR34724_2(<4 x float> %a, <4 x float> %b) {
-; SSSE3_SLOW-LABEL: PR34724_2:
-; SSSE3_SLOW:   # %bb.0:
-; SSSE3_SLOW-NEXT:haddps %xmm1, %xmm0
-; SSSE3_SLOW-NEXT:movsldup {{.*#+}} xmm2 = xmm1[0,0,2,2]
-; SSSE3_SLOW-NEXT:addps %xmm1, %xmm2
-; SSSE3_SLOW-NEXT:shufps {{.*#+}} xmm2 = xmm2[3,0],xmm0[2,0]
-; SSSE3_SLOW-NEXT:shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
-; SSSE3_SLOW-NEXT:retq
-;
-; SSSE3_FAST-LABEL: PR34724_2:
-; SSSE3_FAST:   # %bb.0:
-; SSSE3_FAST-NEXT:haddps %xmm1, %xmm0
-; SSSE3_FAST-NEXT:retq
-;
-; AVX1_SLOW-LABEL: PR34724_2:
-; AVX1_SLOW:   # %bb.0:
-; AVX1_SLOW-NEXT:vhaddps %xmm1, %xmm0, %xmm0
-; AVX1_SLOW-NEXT:vmovsldup {{.*#+}} xmm2 = xmm1[0,0,2,2]
-; AVX1_SLOW-NEXT:vaddps %xmm1, %xmm2, %xmm1
-; AVX1_SLOW-NEXT:vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
-; AVX1_SLOW-NEXT:retq
-;
-; AVX1_FAST-LABEL: PR34724_2:
-; AVX1_FAST:   # %bb.0:
-; AVX1_FAST-NEXT:vhaddps %xmm1, %xmm0, %xmm0
-; AVX1_FAST-NEXT:retq
-;
-; AVX2_SLOW-LABEL: PR34724_2:
-; AVX2_SLOW:   # %bb.0:
-; AVX2_SLOW-NEXT:vhaddps %xmm1, %xmm0, %xmm0
-; AVX2_SLOW-NEXT:vmovsldup {{.*#+}} xmm2 = xmm1[0,0,2,2]
-; AVX2_SLOW-NEXT:vaddps %xmm1, %xmm2, %xmm1
-; AVX2_SLOW-NEXT:vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
-; AVX2_SLOW-NEXT:retq
+; SSSE3-LABEL: PR34724_2:
+; SSSE3:   # %bb.0:
+; SSSE3-NEXT:haddps %xmm1, %xmm0
+; SSSE3-NEXT:retq
 ;
-; AVX2_FAST-LABEL: PR34724_2:
-; AVX2_FAST:   # %bb.0:
-; AVX2_FAST-NEXT:vhaddps %xmm1, %xmm0, %xmm0
-; AVX2_FAST-NEXT:retq
+; AVX-LABEL: PR34724_2:
+; AVX:   # %bb.0:
+; AVX-NEXT:vhaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:retq
   %t0 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> 
   %t1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> 
   %t2 = fadd <4 x float> %t0, %t1



___
llvm-branch-

[llvm-branch-commits] [llvm] 770d1e0 - [X86][SSE] isHorizontalBinOp - reuse any existing horizontal ops.

2021-01-18 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-18T10:14:45Z
New Revision: 770d1e0a8828010a7c95de4596e24d54ed2527c3

URL: 
https://github.com/llvm/llvm-project/commit/770d1e0a8828010a7c95de4596e24d54ed2527c3
DIFF: 
https://github.com/llvm/llvm-project/commit/770d1e0a8828010a7c95de4596e24d54ed2527c3.diff

LOG: [X86][SSE] isHorizontalBinOp - reuse any existing horizontal ops.

If we already have similar horizontal ops using the same args, then match that, 
even if we are on a target with slow horizontal ops.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/haddsub-shuf.ll
llvm/test/CodeGen/X86/haddsub-undef.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6bee21747bce..78a5d4a6dfbf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -45628,8 +45628,9 @@ static SDValue combineVEXTRACT_STORE(SDNode *N, 
SelectionDAG &DAG,
 /// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
 /// A horizontal-op B, for some already available A and B, and if so then LHS 
is
 /// set to A, RHS to B, and the routine returns 'true'.
-static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
-  const X86Subtarget &Subtarget, bool 
IsCommutative,
+static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS,
+  SelectionDAG &DAG, const X86Subtarget &Subtarget,
+  bool IsCommutative,
   SmallVectorImpl &PostShuffleMask) {
   // If either operand is undef, bail out. The binop should be simplified.
   if (LHS.isUndef() || RHS.isUndef())
@@ -45790,9 +45791,20 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue 
&RHS, SelectionDAG &DAG,
   isMultiLaneShuffleMask(128, VT.getScalarSizeInBits(), PostShuffleMask))
 return false;
 
+  // If the source nodes are already used in HorizOps then always accept this.
+  // Shuffle folding should merge these back together.
+  bool FoundHorizLHS = llvm::any_of(NewLHS->uses(), [&](SDNode *User) {
+return User->getOpcode() == HOpcode && User->getValueType(0) == VT;
+  });
+  bool FoundHorizRHS = llvm::any_of(NewRHS->uses(), [&](SDNode *User) {
+return User->getOpcode() == HOpcode && User->getValueType(0) == VT;
+  });
+  bool ForceHorizOp = FoundHorizLHS && FoundHorizRHS;
+
   // Assume a SingleSource HOP if we only shuffle one input and don't need to
   // shuffle the result.
-  if (!shouldUseHorizontalOp(NewLHS == NewRHS &&
+  if (!ForceHorizOp &&
+  !shouldUseHorizontalOp(NewLHS == NewRHS &&
  (NumShuffles < 2 || !IsIdentityPostShuffle),
  DAG, Subtarget))
 return false;
@@ -45816,7 +45828,8 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG 
&DAG,
   SmallVector PostShuffleMask;
   if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
(Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
-  isHorizontalBinOp(LHS, RHS, DAG, Subtarget, IsFadd, PostShuffleMask)) {
+  isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsFadd,
+PostShuffleMask)) {
 SDValue HorizBinOp = DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
 if (!PostShuffleMask.empty())
   HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp,
@@ -48931,17 +48944,18 @@ static SDValue combineAddOrSubToHADDorHSUB(SDNode *N, 
SelectionDAG &DAG,
   SDValue Op0 = N->getOperand(0);
   SDValue Op1 = N->getOperand(1);
   bool IsAdd = N->getOpcode() == ISD::ADD;
+  auto HorizOpcode = IsAdd ? X86ISD::HADD : X86ISD::HSUB;
   assert((IsAdd || N->getOpcode() == ISD::SUB) && "Wrong opcode");
 
   SmallVector PostShuffleMask;
   if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
VT == MVT::v8i32) &&
   Subtarget.hasSSSE3() &&
-  isHorizontalBinOp(Op0, Op1, DAG, Subtarget, IsAdd, PostShuffleMask)) {
-auto HOpBuilder = [IsAdd](SelectionDAG &DAG, const SDLoc &DL,
-  ArrayRef Ops) {
-  return DAG.getNode(IsAdd ? X86ISD::HADD : X86ISD::HSUB, DL,
- Ops[0].getValueType(), Ops);
+  isHorizontalBinOp(HorizOpcode, Op0, Op1, DAG, Subtarget, IsAdd,
+PostShuffleMask)) {
+auto HOpBuilder = [HorizOpcode](SelectionDAG &DAG, const SDLoc &DL,
+ArrayRef Ops) {
+  return DAG.getNode(HorizOpcode, DL, Ops[0].getValueType(), Ops);
 };
 SDValue HorizBinOp =
 SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1}, HOpBuilder);

diff  --git a/llvm/test/CodeGen/X86/haddsub-shuf.ll 
b/llvm/test/CodeGen/X86/haddsub-shuf.ll
index 37eedcd54441..282ef37f6e52 100644
--- a/llvm/test/CodeGen/X86/haddsub-shuf.ll
++

[llvm-branch-commits] [llvm] 207f329 - [DAG] SimplifyDemandedBits - use KnownBits comparisons to remove ISD::UMIN/UMAX ops

2021-01-18 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-18T10:29:23Z
New Revision: 207f32948b2408bebd5a523695f6f7c08049db74

URL: 
https://github.com/llvm/llvm-project/commit/207f32948b2408bebd5a523695f6f7c08049db74
DIFF: 
https://github.com/llvm/llvm-project/commit/207f32948b2408bebd5a523695f6f7c08049db74.diff

LOG: [DAG] SimplifyDemandedBits - use KnownBits comparisons to remove 
ISD::UMIN/UMAX ops

Use the KnownBits icmp comparisons to determine when a ISD::UMIN/UMAX op is 
unnecessary should either op be known to be ULT/ULE or UGT/UGE than the other.

Differential Revision: https://reviews.llvm.org/D94532

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
llvm/test/CodeGen/X86/combine-umin.ll
llvm/test/CodeGen/X86/sdiv_fix_sat.ll
llvm/test/CodeGen/X86/udiv_fix_sat.ll

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e265bcea5945..ef83df8bdd96 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4607,6 +4607,10 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
   return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
   }
 
+  // Simplify the operands using demanded-bits information.
+  if (SimplifyDemandedBits(SDValue(N, 0)))
+return SDValue(N, 0);
+
   return SDValue();
 }
 

diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp 
b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 21953373b745..b19033e3e427 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1722,6 +1722,32 @@ bool TargetLowering::SimplifyDemandedBits(
 }
 break;
   }
+  case ISD::UMIN: {
+// Check if one arg is always less than (or equal) to the other arg.
+SDValue Op0 = Op.getOperand(0);
+SDValue Op1 = Op.getOperand(1);
+KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
+KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
+Known = KnownBits::umin(Known0, Known1);
+if (Optional IsULE = KnownBits::ule(Known0, Known1))
+  return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
+if (Optional IsULT = KnownBits::ult(Known0, Known1))
+  return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
+break;
+  }
+  case ISD::UMAX: {
+// Check if one arg is always greater than (or equal) to the other arg.
+SDValue Op0 = Op.getOperand(0);
+SDValue Op1 = Op.getOperand(1);
+KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
+KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
+Known = KnownBits::umax(Known0, Known1);
+if (Optional IsUGE = KnownBits::uge(Known0, Known1))
+  return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
+if (Optional IsUGT = KnownBits::ugt(Known0, Known1))
+  return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
+break;
+  }
   case ISD::BITREVERSE: {
 SDValue Src = Op.getOperand(0);
 APInt DemandedSrcBits = DemandedBits.reverseBits();

diff  --git a/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll 
b/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
index b4cd36daad65..f0604c7fe782 100644
--- a/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
@@ -18,7 +18,7 @@ define amdgpu_kernel void @test(i64 addrspace(1)* %out) {
 ; CHECK-NEXT:2(2.802597e-45), 0(0.00e+00)
 ; CHECK-NEXT: MOV * T0.W, KC0[2].Y,
 ; CHECK-NEXT:ALU clause starting at 11:
-; CHECK-NEXT: MAX_UINT T0.X, T0.X, literal.x,
+; CHECK-NEXT: MOV T0.X, literal.x,
 ; CHECK-NEXT: MOV T0.Y, 0.0,
 ; CHECK-NEXT: LSHR * T1.X, T0.W, literal.y,
 ; CHECK-NEXT:4(5.605194e-45), 2(2.802597e-45)

diff  --git a/llvm/test/CodeGen/X86/combine-umin.ll 
b/llvm/test/CodeGen/X86/combine-umin.ll
index b22c45bbce45..1be72ad66799 100644
--- a/llvm/test/CodeGen/X86/combine-umin.ll
+++ b/llvm/test/CodeGen/X86/combine-umin.ll
@@ -10,14 +10,9 @@
 define i8 @test_demandedbits_umin_ult(i8 %a0, i8 %a1) {
 ; CHECK-LABEL: test_demandedbits_umin_ult:
 ; CHECK:   # %bb.0:
-; CHECK-NEXT:orb $12, %dil
-; CHECK-NEXT:orb $4, %sil
-; CHECK-NEXT:andb $13, %dil
-; CHECK-NEXT:andb $12, %sil
-; CHECK-NEXT:movzbl %dil, %ecx
-; CHECK-NEXT:movzbl %sil, %eax
-; CHECK-NEXT:cmpb %al, %cl
-; CHECK-NEXT:cmovbl %ecx, %eax
+; CHECK-NEXT:movl %esi, %eax
+; CHECK-NEXT:orb $4, %al
+; CHECK-NEXT:andb $12, %al
 ; CHECK-NEXT:# kill: def $al killed $al killed $eax
 ; CHECK-NEXT:retq
   %lhs0 = and i8 %a0, 13  ; b1101

diff  --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll 
b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index 617d5d7876bd..9801cb4018b9 100644
-

[llvm-branch-commits] [llvm] ce06475 - [X86][AVX] IsElementEquivalent - add matchShuffleWithUNPCK + VBROADCAST/VBROADCAST_LOAD handling

2021-01-18 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-18T15:55:00Z
New Revision: ce06475da94f1040d17d46d471dd48478576a76f

URL: 
https://github.com/llvm/llvm-project/commit/ce06475da94f1040d17d46d471dd48478576a76f
DIFF: 
https://github.com/llvm/llvm-project/commit/ce06475da94f1040d17d46d471dd48478576a76f.diff

LOG: [X86][AVX] IsElementEquivalent - add matchShuffleWithUNPCK + 
VBROADCAST/VBROADCAST_LOAD handling

Specify LHS/RHS operands in matchShuffleWithUNPCK's calls to 
isTargetShuffleEquivalent, and handle VBROADCAST/VBROADCAST_LOAD matching in 
IsElementEquivalent

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avg.ll
llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 78a5d4a6dfbf8..60a2fd233d5cb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -10960,6 +10960,11 @@ static bool IsElementEquivalent(int MaskSize, SDValue 
Op, SDValue ExpectedOp,
 MaskSize == (int)ExpectedOp.getNumOperands())
   return Op.getOperand(Idx) == ExpectedOp.getOperand(ExpectedIdx);
 break;
+  case X86ISD::VBROADCAST:
+  case X86ISD::VBROADCAST_LOAD:
+// TODO: Handle MaskSize != Op.getValueType().getVectorNumElements()?
+return (Op == ExpectedOp &&
+Op.getValueType().getVectorNumElements() == MaskSize);
   case X86ISD::HADD:
   case X86ISD::HSUB:
   case X86ISD::FHADD:
@@ -11321,7 +11326,8 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, 
SDValue &V2,
   // Attempt to match the target mask against the unpack lo/hi mask patterns.
   SmallVector Unpckl, Unpckh;
   createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary);
-  if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl)) {
+  if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, V1,
+(IsUnary ? V1 : V2))) {
 UnpackOpcode = X86ISD::UNPCKL;
 V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
 V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
@@ -11329,7 +11335,8 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, 
SDValue &V2,
   }
 
   createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary);
-  if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh)) {
+  if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, V1,
+(IsUnary ? V1 : V2))) {
 UnpackOpcode = X86ISD::UNPCKH;
 V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
 V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);

diff  --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll
index e2139fd20d32c..23fa7e033db9e 100644
--- a/llvm/test/CodeGen/X86/avg.ll
+++ b/llvm/test/CodeGen/X86/avg.ll
@@ -2245,7 +2245,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, 
<16 x i8>* %b) nounwind
 ; AVX2-NEXT:vpunpcklbw {{.*#+}} xmm9 = 
xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; AVX2-NEXT:vpbroadcastw %xmm8, %xmm8
 ; AVX2-NEXT:vpbroadcastw %xmm9, %xmm0
-; AVX2-NEXT:vpblendw {{.*#+}} xmm8 = xmm0[0,1,2,3,4,5,6],xmm8[7]
+; AVX2-NEXT:vpunpcklwd {{.*#+}} xmm8 = 
xmm0[0],xmm8[0],xmm0[1],xmm8[1],xmm0[2],xmm8[2],xmm0[3],xmm8[3]
 ; AVX2-NEXT:vpunpcklbw {{.*#+}} xmm0 = 
xmm13[0],xmm12[0],xmm13[1],xmm12[1],xmm13[2],xmm12[2],xmm13[3],xmm12[3],xmm13[4],xmm12[4],xmm13[5],xmm12[5],xmm13[6],xmm12[6],xmm13[7],xmm12[7]
 ; AVX2-NEXT:vpunpcklbw {{.*#+}} xmm9 = 
xmm15[0],xmm14[0],xmm15[1],xmm14[1],xmm15[2],xmm14[2],xmm15[3],xmm14[3],xmm15[4],xmm14[4],xmm15[5],xmm14[5],xmm15[6],xmm14[6],xmm15[7],xmm14[7]
 ; AVX2-NEXT:vpbroadcastw %xmm0, %xmm0

diff  --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll 
b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
index 29ea4d3bf55d3..4c86242a1d302 100644
--- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
+++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
@@ -4230,11 +4230,10 @@ define <4 x double> 
@test_masked_z_8xdouble_to_4xdouble_perm_mem_mask6(<8 x doub
 define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask7(<8 x 
double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
 ; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mem_mask7:
 ; CHECK:   # %bb.0:
-; CHECK-NEXT:vbroadcastsd 40(%rdi), %ymm2
-; CHECK-NEXT:vblendpd $5, (%rdi), %ymm2, %ymm2 # ymm2 = 
mem[0],ymm2[1],mem[2],ymm2[3]
+; CHECK-NEXT:vmovapd (%rdi), %ymm2
 ; CHECK-NEXT:vxorpd %xmm3, %xmm3, %xmm3
 ; CHECK-NEXT:vcmpeqpd %ymm3, %ymm1, %k1
-; CHECK-NEXT:vmovapd %ymm2, %ymm0 {%k1}
+; CHECK-NEXT:vunpcklpd 40(%rdi){1to4}, %ymm2, %ymm0 {%k1}
 ; CHECK-NEXT:retq
   %vec = load <8 x double>, <8 x double>* %vp
   %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> 
@@ -4246,11 +4245,10 @@ d

[llvm-branch-commits] [llvm] 5626adc - [X86][SSE] combineVectorSignBitsTruncation - fold trunc(srl(x, c)) -> packss(sra(x, c))

2021-01-19 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-19T11:04:13Z
New Revision: 5626adcd6bbaadd12fe5bf15cd2d39ece2e5c406

URL: 
https://github.com/llvm/llvm-project/commit/5626adcd6bbaadd12fe5bf15cd2d39ece2e5c406
DIFF: 
https://github.com/llvm/llvm-project/commit/5626adcd6bbaadd12fe5bf15cd2d39ece2e5c406.diff

LOG: [X86][SSE] combineVectorSignBitsTruncation - fold trunc(srl(x,c)) -> 
packss(sra(x,c))

If a srl doesn't introduce any sign bits into the truncated result, then 
replace with a sra to let us use a PACKSS truncation - fixes a regression 
noticed in D56387 on pre-SSE41 targets that don't have PACKUSDW.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-trunc.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 97fcef0b92fa..0ee671710219 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -46071,9 +46071,23 @@ static SDValue combineVectorSignBitsTruncation(SDNode 
*N, const SDLoc &DL,
   if (SVT == MVT::i32 && NumSignBits != InSVT.getSizeInBits())
 return SDValue();
 
-  if (NumSignBits > (InSVT.getSizeInBits() - NumPackedSignBits))
+  unsigned MinSignBits = InSVT.getSizeInBits() - NumPackedSignBits;
+  if (NumSignBits > MinSignBits)
 return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget);
 
+  // If we have a srl that only generates signbits that we will discard in
+  // the truncation then we can use PACKSS by converting the srl to a sra.
+  // SimplifyDemandedBits often relaxes sra to srl so we need to reverse it.
+  if (In.getOpcode() == ISD::SRL && N->isOnlyUserOf(In.getNode()))
+if (const APInt *ShAmt = DAG.getValidShiftAmountConstant(
+In, APInt::getAllOnesValue(VT.getVectorNumElements( {
+  if (*ShAmt == MinSignBits) {
+SDValue NewIn = DAG.getNode(ISD::SRA, DL, InVT, In->ops());
+return truncateVectorWithPACK(X86ISD::PACKSS, VT, NewIn, DL, DAG,
+  Subtarget);
+  }
+}
+
   return SDValue();
 }
 

diff  --git a/llvm/test/CodeGen/X86/vector-trunc.ll 
b/llvm/test/CodeGen/X86/vector-trunc.ll
index f35e315bbb0b..1d8d6f66521e 100644
--- a/llvm/test/CodeGen/X86/vector-trunc.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc.ll
@@ -452,10 +452,9 @@ define <8 x i16> @trunc8i32_8i16_lshr(<8 x i32> %a) {
 ;
 ; SSSE3-LABEL: trunc8i32_8i16_lshr:
 ; SSSE3:   # %bb.0: # %entry
-; SSSE3-NEXT:movdqa {{.*#+}} xmm2 = 
[2,3,6,7,10,11,14,15,10,11,14,15,14,15,128,128]
-; SSSE3-NEXT:pshufb %xmm2, %xmm1
-; SSSE3-NEXT:pshufb %xmm2, %xmm0
-; SSSE3-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT:psrad $16, %xmm1
+; SSSE3-NEXT:psrad $16, %xmm0
+; SSSE3-NEXT:packssdw %xmm1, %xmm0
 ; SSSE3-NEXT:retq
 ;
 ; SSE41-LABEL: trunc8i32_8i16_lshr:



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 2988f94 - [X86] Regenerate fmin/fmax reduction tests

2021-01-19 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-19T14:28:44Z
New Revision: 2988f940d861f0fa76bc5b749772f2b9239d5a1b

URL: 
https://github.com/llvm/llvm-project/commit/2988f940d861f0fa76bc5b749772f2b9239d5a1b
DIFF: 
https://github.com/llvm/llvm-project/commit/2988f940d861f0fa76bc5b749772f2b9239d5a1b.diff

LOG: [X86] Regenerate fmin/fmax reduction tests

Add missing check-prefixes + v1f32 tests

Added: 


Modified: 
llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
llvm/test/CodeGen/X86/vector-reduce-fmax.ll
llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
llvm/test/CodeGen/X86/vector-reduce-fmin.ll

Removed: 




diff  --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll 
b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
index 021c48deece7..167248181ecb 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
@@ -1,15 +1,23 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s 
--check-prefixes=SSE,SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s 
--check-prefixes=SSE,SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s 
--check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s 
--check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | 
FileCheck %s --check-prefix=AVX512
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown 
-mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s 
--check-prefixes=ALL,SSE,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s 
--check-prefixes=ALL,SSE,SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s 
--check-prefixes=ALL,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s 
--check-prefixes=ALL,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | 
FileCheck %s --check-prefixes=ALL,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown 
-mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=ALL,AVX512
 
 ;
 ; vXf32
 ;
 
+define float @test_v1f32(<1 x float> %a0) {
+; ALL-LABEL: test_v1f32:
+; ALL:   # %bb.0:
+; ALL-NEXT:retq
+  %1 = call nnan float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a0)
+  ret float %1
+}
+
 define float @test_v2f32(<2 x float> %a0) {
 ; SSE2-LABEL: test_v2f32:
 ; SSE2:   # %bb.0:
@@ -458,10 +466,10 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
 ; SSE-NEXT:subq $16, %rsp
 ; SSE-NEXT:movl %edi, %ebx
 ; SSE-NEXT:movzwl %si, %edi
-; SSE-NEXT:callq __gnu_h2f_ieee
+; SSE-NEXT:callq __gnu_h2f_ieee@PLT
 ; SSE-NEXT:movaps %xmm0, (%rsp) # 16-byte Spill
 ; SSE-NEXT:movzwl %bx, %edi
-; SSE-NEXT:callq __gnu_h2f_ieee
+; SSE-NEXT:callq __gnu_h2f_ieee@PLT
 ; SSE-NEXT:movaps %xmm0, %xmm1
 ; SSE-NEXT:cmpunordss %xmm0, %xmm1
 ; SSE-NEXT:movaps %xmm1, %xmm2
@@ -471,7 +479,7 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
 ; SSE-NEXT:andnps %xmm3, %xmm1
 ; SSE-NEXT:orps %xmm2, %xmm1
 ; SSE-NEXT:movaps %xmm1, %xmm0
-; SSE-NEXT:callq __gnu_f2h_ieee
+; SSE-NEXT:callq __gnu_f2h_ieee@PLT
 ; SSE-NEXT:addq $16, %rsp
 ; SSE-NEXT:popq %rbx
 ; SSE-NEXT:retq
@@ -482,16 +490,16 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
 ; AVX-NEXT:subq $16, %rsp
 ; AVX-NEXT:movl %esi, %ebx
 ; AVX-NEXT:movzwl %di, %edi
-; AVX-NEXT:callq __gnu_h2f_ieee
+; AVX-NEXT:callq __gnu_h2f_ieee@PLT
 ; AVX-NEXT:vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 ; AVX-NEXT:movzwl %bx, %edi
-; AVX-NEXT:callq __gnu_h2f_ieee
+; AVX-NEXT:callq __gnu_h2f_ieee@PLT
 ; AVX-NEXT:vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Reload
 ; AVX-NEXT:# xmm2 = mem[0],zero,zero,zero
 ; AVX-NEXT:vmaxss %xmm2, %xmm0, %xmm1
 ; AVX-NEXT:vcmpunordss %xmm2, %xmm2, %xmm2
 ; AVX-NEXT:vblendvps %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT:callq __gnu_f2h_ieee
+; AVX-NEXT:callq __gnu_f2h_ieee@PLT
 ; AVX-NEXT:addq $16, %rsp
 ; AVX-NEXT:popq %rbx
 ; AVX-NEXT:retq
@@ -514,6 +522,7 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
   %1 = call nnan half @llvm.vector.reduce.fmax.v2f16(<2 x half> %a0)
   ret half %1
 }
+declare float @llvm.vector.reduce.fmax.v1f32(<1 x float>)
 declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
 declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
 declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)

diff  --git a/llvm/test/CodeGen/X86/vector-reduce-fmax.ll 
b/llvm/test/CodeGen/X86/vector-reduce-fmax.ll
index af8141a119ab..d7d754ac5548 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax.

[llvm-branch-commits] [llvm] 19d0284 - [X86][AVX] Fold extract_subvector(VSRLI/VSHLI(x, 32)) -> VSRLI/VSHLI(extract_subvector(x), 32)

2021-01-20 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-20T14:34:54Z
New Revision: 19d02842ee56089b9208875ce4582e113e08fb6d

URL: 
https://github.com/llvm/llvm-project/commit/19d02842ee56089b9208875ce4582e113e08fb6d
DIFF: 
https://github.com/llvm/llvm-project/commit/19d02842ee56089b9208875ce4582e113e08fb6d.diff

LOG: [X86][AVX] Fold extract_subvector(VSRLI/VSHLI(x,32)) -> 
VSRLI/VSHLI(extract_subvector(x),32)

As discussed on D56387, if we're shifting to extract the upper/lower half of a 
vXi64 vector then we're actually better off performing this at the subvector 
level as its very likely to fold into something.

combineConcatVectorOps can perform this in reverse if necessary.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-sra.ll
llvm/test/CodeGen/X86/pmul.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0ee671710219..0b52b2021c73 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49799,8 +49799,8 @@ static SDValue combineExtractSubvector(SDNode *N, 
SelectionDAG &DAG,
 
   // If we're extracting the lowest subvector and we're the only user,
   // we may be able to perform this with a smaller vector width.
+  unsigned InOpcode = InVec.getOpcode();
   if (IdxVal == 0 && InVec.hasOneUse()) {
-unsigned InOpcode = InVec.getOpcode();
 if (VT == MVT::v2f64 && InVecVT == MVT::v4f64) {
   // v2f64 CVTDQ2PD(v4i32).
   if (InOpcode == ISD::SINT_TO_FP &&
@@ -49853,6 +49853,17 @@ static SDValue combineExtractSubvector(SDNode *N, 
SelectionDAG &DAG,
 }
   }
 
+  // Always split vXi64 logical shifts where we're extracting the upper 32-bits
+  // as this is very likely to fold into a shuffle/truncation.
+  if ((InOpcode == X86ISD::VSHLI || InOpcode == X86ISD::VSRLI) &&
+  InVecVT.getScalarSizeInBits() == 64 &&
+  InVec.getConstantOperandAPInt(1) == 32) {
+SDLoc DL(N);
+SDValue Ext =
+extractSubVector(InVec.getOperand(0), IdxVal, DAG, DL, SizeInBits);
+return DAG.getNode(InOpcode, DL, VT, Ext, InVec.getOperand(1));
+  }
+
   return SDValue();
 }
 

diff  --git a/llvm/test/CodeGen/X86/combine-sra.ll 
b/llvm/test/CodeGen/X86/combine-sra.ll
index 28a73cdb6a41..453a61b8565e 100644
--- a/llvm/test/CodeGen/X86/combine-sra.ll
+++ b/llvm/test/CodeGen/X86/combine-sra.ll
@@ -207,9 +207,8 @@ define <4 x i32> @combine_vec_ashr_trunc_lshr(<4 x i64> %x) 
{
 ;
 ; AVX2-SLOW-LABEL: combine_vec_ashr_trunc_lshr:
 ; AVX2-SLOW:   # %bb.0:
-; AVX2-SLOW-NEXT:vpsrlq $32, %ymm0, %ymm0
-; AVX2-SLOW-NEXT:vextracti128 $1, %ymm0, %xmm1
-; AVX2-SLOW-NEXT:vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; AVX2-SLOW-NEXT:vextractf128 $1, %ymm0, %xmm1
+; AVX2-SLOW-NEXT:vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
 ; AVX2-SLOW-NEXT:vpsravd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-SLOW-NEXT:vzeroupper
 ; AVX2-SLOW-NEXT:retq

diff  --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll
index db6009f273d2..56476eea323e 100644
--- a/llvm/test/CodeGen/X86/pmul.ll
+++ b/llvm/test/CodeGen/X86/pmul.ll
@@ -1150,9 +1150,8 @@ define <4 x i32> @mul_v4i64_zero_lower(<4 x i32> %val1, 
<4 x i64> %val2) {
 ; AVX-NEXT:vpmovzxdq {{.*#+}} ymm0 = 
xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
 ; AVX-NEXT:vpsrlq $32, %ymm1, %ymm1
 ; AVX-NEXT:vpmuludq %ymm1, %ymm0, %ymm0
-; AVX-NEXT:vpsllq $32, %ymm0, %ymm0
 ; AVX-NEXT:vextracti128 $1, %ymm0, %xmm1
-; AVX-NEXT:vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
+; AVX-NEXT:vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
 ; AVX-NEXT:vzeroupper
 ; AVX-NEXT:retq
 entry:

diff  --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll 
b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
index a274baefc1ef..f0cb46e63d8f 100644
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
@@ -834,19 +834,20 @@ define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 {
 ;
 ; AVX2-64-LABEL: uitofp_v4i64_v4f64:
 ; AVX2-64:   # %bb.0:
-; AVX2-64-NEXT:vpsrlq $32, %ymm0, %ymm1
-; AVX2-64-NEXT:vextracti128 $1, %ymm1, %xmm2
+; AVX2-64-NEXT:vextracti128 $1, %ymm0, %xmm1
+; AVX2-64-NEXT:vpsrlq $32, %xmm1, %xmm1
+; AVX2-64-NEXT:vpextrq $1, %xmm1, %rax
+; AVX2-64-NEXT:vcvtsi2sd %rax, %xmm2, %xmm2
+; AVX2-64-NEXT:vmovq %xmm1, %rax
+; AVX2-64-NEXT:vcvtsi2sd %rax, %xmm3, %xmm1
+; AVX2-64-NEXT:vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX2-64-NEXT:vpsrlq $32, %xmm0, %xmm2
 ; AVX2-64-NEXT:vpextrq $1, %xmm2, %rax
 ; AVX2-64-NEXT:vcvtsi2sd %rax, %xmm3, %xmm3
 ; AVX2-64-NEXT:vmovq %xmm2, %rax
 ; AVX2-64-NEXT:vcvtsi2sd %rax, %xmm4, %xmm2
 ; AVX2-64-NEXT:vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; AVX2-64-NEXT:vpextrq $1, %xmm1, 

[llvm-branch-commits] [llvm] cad4275 - [DAGCombiner] Enable SimplifyDemandedBits vector support for TRUNCATE

2021-01-20 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-20T15:39:58Z
New Revision: cad4275d697c601761e0819863f487def73c67f8

URL: 
https://github.com/llvm/llvm-project/commit/cad4275d697c601761e0819863f487def73c67f8
DIFF: 
https://github.com/llvm/llvm-project/commit/cad4275d697c601761e0819863f487def73c67f8.diff

LOG: [DAGCombiner] Enable SimplifyDemandedBits vector support for TRUNCATE

Add DemandedElts support inside the TRUNCATE analysis.

Differential Revision: https://reviews.llvm.org/D56387

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/aarch64-smull.ll
llvm/test/CodeGen/AArch64/lowerMUL-newload.ll
llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
llvm/test/CodeGen/ARM/lowerMUL-newload.ll
llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
llvm/test/CodeGen/Thumb2/mve-vmulh.ll
llvm/test/CodeGen/X86/combine-sra.ll
llvm/test/CodeGen/X86/known-signbits-vector.ll
llvm/test/CodeGen/X86/min-legal-vector-width.ll
llvm/test/CodeGen/X86/vector-trunc.ll

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f7c6a77b9a03..680662536161 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11952,8 +11952,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   }
 
   // Simplify the operands using demanded-bits information.
-  if (!VT.isVector() &&
-  SimplifyDemandedBits(SDValue(N, 0)))
+  if (SimplifyDemandedBits(SDValue(N, 0)))
 return SDValue(N, 0);
 
   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)

diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp 
b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b19033e3e427..5613db8f724d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1986,7 +1986,8 @@ bool TargetLowering::SimplifyDemandedBits(
 // zero/one bits live out.
 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
-if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
+if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
+ Depth + 1))
   return true;
 Known = Known.trunc(BitWidth);
 
@@ -2009,9 +2010,9 @@ bool TargetLowering::SimplifyDemandedBits(
   // undesirable.
   break;
 
-SDValue ShAmt = Src.getOperand(1);
-auto *ShAmtC = dyn_cast(ShAmt);
-if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
+const APInt *ShAmtC =
+TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
+if (!ShAmtC)
   break;
 uint64_t ShVal = ShAmtC->getZExtValue();
 
@@ -2023,6 +2024,7 @@ bool TargetLowering::SimplifyDemandedBits(
 if (!(HighBits & DemandedBits)) {
   // None of the shifted in bits are needed.  Add a truncate of the
   // shift input, then shift it.
+  SDValue ShAmt = Src.getOperand(1);
   if (TLO.LegalTypes())
 ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
   SDValue NewTrunc =

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c7bcd4de046c..6dd081dc3cb7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3399,6 +3399,7 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, 
SelectionDAG &DAG) {
 
 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
   return N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ANY_EXTEND ||
  isExtendedBUILD_VECTOR(N, DAG, true);
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll 
b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 0a692192ec8b..0c232a4bf5a8 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -96,7 +96,7 @@ define <8 x i16> @amull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* 
%B) nounwind {
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:ldr d0, [x0]
 ; CHECK-NEXT:ldr d1, [x1]
-; CHECK-NEXT:umull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:smull v0.8h, v0.8b, v1.8b
 ; CHECK-NEXT:bic v0.8h, #255, lsl #8
 ; CHECK-NEXT:ret
   %tmp1 = load <8 x i8>, <8 x i8>* %A
@@ -113,7 +113,7 @@ define <4 x i32> @amull_v4i16_v4i32(<4 x i16>* %A, <4 x 
i16>* %B) nounwind {
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:ldr d0, [x0]
 ; CHECK-NEXT:ldr d1, [x1]
-; CHECK-NEXT:umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:smull v0.4s, v0.4h, v1.4h
 ; CHECK-NEXT:movi v1.2d, #0x00
 ; CHECK-NEXT:and v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:re

[llvm-branch-commits] [llvm] b8b5e87 - [X86][AVX] Handle vperm2x128 shuffling of a subvector splat.

2021-01-20 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-20T18:16:33Z
New Revision: b8b5e87e6b8102d77e4e6beccf4e0f0237acc897

URL: 
https://github.com/llvm/llvm-project/commit/b8b5e87e6b8102d77e4e6beccf4e0f0237acc897
DIFF: 
https://github.com/llvm/llvm-project/commit/b8b5e87e6b8102d77e4e6beccf4e0f0237acc897.diff

LOG: [X86][AVX] Handle vperm2x128 shuffling of a subvector splat.

We already handle "vperm2x128 (ins ?, X, C1), (ins ?, X, C1), 0x31" for 
shuffling of the upper subvectors, but we weren't dealing with the case when we 
were splatting the upper subvector from a single source.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx-vperm2x128.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0b52b2021c73..852078a299b9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37324,6 +37324,14 @@ static SDValue combineTargetShuffle(SDValue N, 
SelectionDAG &DAG,
 SDValue Ins1 = peekThroughBitcasts(N.getOperand(1));
 unsigned Imm = N.getConstantOperandVal(2);
 
+// Handle subvector splat by tweaking values to match binary concat.
+// vperm2x128 (ins ?, X, C1), undef, 0x11 ->
+// vperm2x128 (ins ?, X, C1), (ins ?, X, C1), 0x31 -> concat X, X
+if (Imm == 0x11 && Ins1.isUndef()) {
+  Imm = 0x31;
+  Ins1 = Ins0;
+}
+
 if (!(Imm == 0x31 &&
   Ins0.getOpcode() == ISD::INSERT_SUBVECTOR &&
   Ins1.getOpcode() == ISD::INSERT_SUBVECTOR &&

diff  --git a/llvm/test/CodeGen/X86/avx-vperm2x128.ll 
b/llvm/test/CodeGen/X86/avx-vperm2x128.ll
index a519f55aaafe..bfab2f186bf5 100644
--- a/llvm/test/CodeGen/X86/avx-vperm2x128.ll
+++ b/llvm/test/CodeGen/X86/avx-vperm2x128.ll
@@ -130,7 +130,6 @@ define <32 x i8> @shuffle_v32i8_2323_domain(<32 x i8> %a, 
<32 x i8> %b) nounwind
 ; AVX1-NEXT:vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:vpsubb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT:vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
 ; AVX1-NEXT:retq
 ;
 ; AVX2-LABEL: shuffle_v32i8_2323_domain:

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll 
b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
index f1af4faf67e2..5f2a3cd72b71 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -3098,14 +3098,13 @@ entry:
 define <8 x i32> @add_v8i32_02468ACE_13579BDF(<8 x i32> %a, <8 x i32> %b) {
 ; AVX1-LABEL: add_v8i32_02468ACE_13579BDF:
 ; AVX1:   # %bb.0: # %entry
-; AVX1-NEXT:vphaddd %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:vphaddd %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:vinsertf128 $1, %xmm2, %ymm2, %ymm2
-; AVX1-NEXT:vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:vphaddd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT:vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
-; AVX1-NEXT:vshufpd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[3],ymm0[3]
+; AVX1-NEXT:vshufpd {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[3],ymm2[3]
 ; AVX1-NEXT:retq
 ;
 ; AVX2OR512VL-LABEL: add_v8i32_02468ACE_13579BDF:
@@ -3123,14 +3122,13 @@ entry:
 define <8 x i32> @add_v8i32_8ACE0246_9BDF1357(<8 x i32> %a, <8 x i32> %b) {
 ; AVX1-LABEL: add_v8i32_8ACE0246_9BDF1357:
 ; AVX1:   # %bb.0: # %entry
-; AVX1-NEXT:vphaddd %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:vphaddd %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:vinsertf128 $1, %xmm2, %ymm2, %ymm2
-; AVX1-NEXT:vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:vphaddd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT:vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
-; AVX1-NEXT:vshufpd {{.*#+}} ymm0 = ymm2[1],ymm0[1],ymm2[2],ymm0[2]
+; AVX1-NEXT:vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[2],ymm2[2]
 ; AVX1-NEXT:retq
 ;
 ; AVX2OR512VL-LABEL: add_v8i32_8ACE0246_9BDF1357:



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 86021d9 - [X86] Avoid a std::string copy by replacing auto with const auto&. NFC.

2021-01-21 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-21T11:04:07Z
New Revision: 86021d98d3f8b27f7956cee04f11505c2e836e81

URL: 
https://github.com/llvm/llvm-project/commit/86021d98d3f8b27f7956cee04f11505c2e836e81
DIFF: 
https://github.com/llvm/llvm-project/commit/86021d98d3f8b27f7956cee04f11505c2e836e81.diff

LOG: [X86] Avoid a std::string copy by replacing auto with const auto&. NFC.

Fixes msvc analyzer warning.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7cd17f109935..c5cc23f6236e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2516,11 +2516,11 @@ Value *X86TargetLowering::getIRStackGuard(IRBuilder<> 
&IRB) const {
   if (Offset == (unsigned)-1)
 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
 
-  auto GuardReg = getTargetMachine().Options.StackProtectorGuardReg;
-if (GuardReg == "fs")
-  AddressSpace = X86AS::FS;
-else if (GuardReg == "gs")
-  AddressSpace = X86AS::GS;
+  const auto &GuardReg = getTargetMachine().Options.StackProtectorGuardReg;
+  if (GuardReg == "fs")
+AddressSpace = X86AS::FS;
+  else if (GuardReg == "gs")
+AddressSpace = X86AS::GS;
   return SegmentOffset(IRB, Offset, AddressSpace);
 }
   }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] bc9ab9a - [DAG] CombineToPreIndexedLoadStore - use const APInt& for getAPIntValue(). NFCI.

2021-01-21 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-21T11:04:09Z
New Revision: bc9ab9a5cd6bafc5e1293f3d5d51638f8f5cd26c

URL: 
https://github.com/llvm/llvm-project/commit/bc9ab9a5cd6bafc5e1293f3d5d51638f8f5cd26c
DIFF: 
https://github.com/llvm/llvm-project/commit/bc9ab9a5cd6bafc5e1293f3d5d51638f8f5cd26c.diff

LOG: [DAG] CombineToPreIndexedLoadStore - use const APInt& for getAPIntValue(). 
NFCI.

Cleanup some code to use auto* properly from cast, and use const APInt& for 
getAPIntValue() to avoid an unnecessary copy.

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f7c6a77b9a03..067bc436acdd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14940,16 +14940,13 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode 
*N) {
 // Therefore, we have:
 //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
 
-ConstantSDNode *CN =
-  cast(OtherUses[i]->getOperand(OffsetIdx));
-int X0, X1, Y0, Y1;
+auto *CN = cast(OtherUses[i]->getOperand(OffsetIdx));
 const APInt &Offset0 = CN->getAPIntValue();
-APInt Offset1 = cast(Offset)->getAPIntValue();
-
-X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
-Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
-X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
-Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
+const APInt &Offset1 = cast(Offset)->getAPIntValue();
+int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 
1;
+int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 
1;
+int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
+int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
 
 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 935bacd - [DAG] SimplifyDemandedBits - correctly adjust truncated shift amount type

2021-01-21 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-21T12:38:36Z
New Revision: 935bacd3a7244f04b7f39818e3fc589529474d13

URL: 
https://github.com/llvm/llvm-project/commit/935bacd3a7244f04b7f39818e3fc589529474d13
DIFF: 
https://github.com/llvm/llvm-project/commit/935bacd3a7244f04b7f39818e3fc589529474d13.diff

LOG: [DAG] SimplifyDemandedBits - correctly adjust truncated shift amount type

As noticed on D56387, for vectors we must always correctly adjust the shift 
amount type during truncation (not just after legalization). We were getting 
away with it as we currently only accepted scalars via the 
dyn_cast.

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp 
b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b19033e3e427..cac4d8fff8bb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2023,12 +2023,12 @@ bool TargetLowering::SimplifyDemandedBits(
 if (!(HighBits & DemandedBits)) {
   // None of the shifted in bits are needed.  Add a truncate of the
   // shift input, then shift it.
-  if (TLO.LegalTypes())
-ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
+  SDValue NewShAmt = TLO.DAG.getConstant(
+  ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
   SDValue NewTrunc =
   TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
   return TLO.CombineTo(
-  Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt));
+  Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
 }
 break;
   }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 0ca81b9 - [X86][SSE] Add uitofp(trunc(and(lshr(x, c)))) vector test

2021-01-21 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-21T12:38:36Z
New Revision: 0ca81b90d19d395c4891b7507cec0f063dd26d22

URL: 
https://github.com/llvm/llvm-project/commit/0ca81b90d19d395c4891b7507cec0f063dd26d22
DIFF: 
https://github.com/llvm/llvm-project/commit/0ca81b90d19d395c4891b7507cec0f063dd26d22.diff

LOG: [X86][SSE] Add uitofp(trunc(and(lshr(x,c vector test

Reduced from regression reported by @hans on D56387

Added: 


Modified: 
llvm/test/CodeGen/X86/uint_to_fp-3.ll

Removed: 




diff  --git a/llvm/test/CodeGen/X86/uint_to_fp-3.ll 
b/llvm/test/CodeGen/X86/uint_to_fp-3.ll
index ca46b48b7731..5f1c3ec69a34 100644
--- a/llvm/test/CodeGen/X86/uint_to_fp-3.ll
+++ b/llvm/test/CodeGen/X86/uint_to_fp-3.ll
@@ -69,3 +69,64 @@ define <4 x double> @mask_ucvt_4i32_4f64(<4 x i32> %a) {
   %cvt = uitofp <4 x i32> %and to <4 x double>
   ret <4 x double> %cvt
 }
+
+; Regression noticed in D56387
+define <4 x float> @lshr_truncate_mask_ucvt_4i64_4f32(<4 x i64> *%p0) {
+; X32-SSE-LABEL: lshr_truncate_mask_ucvt_4i64_4f32:
+; X32-SSE:   # %bb.0:
+; X32-SSE-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT:movdqu (%eax), %xmm0
+; X32-SSE-NEXT:movdqu 16(%eax), %xmm1
+; X32-SSE-NEXT:psrlq $16, %xmm1
+; X32-SSE-NEXT:psrlq $16, %xmm0
+; X32-SSE-NEXT:shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; X32-SSE-NEXT:andps {{\.LCPI.*}}, %xmm0
+; X32-SSE-NEXT:cvtdq2ps %xmm0, %xmm0
+; X32-SSE-NEXT:mulps {{\.LCPI.*}}, %xmm0
+; X32-SSE-NEXT:retl
+;
+; X32-AVX-LABEL: lshr_truncate_mask_ucvt_4i64_4f32:
+; X32-AVX:   # %bb.0:
+; X32-AVX-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X32-AVX-NEXT:vmovdqu (%eax), %xmm0
+; X32-AVX-NEXT:vmovdqu 16(%eax), %xmm1
+; X32-AVX-NEXT:vpsrlq $16, %xmm1, %xmm1
+; X32-AVX-NEXT:vpsrlq $16, %xmm0, %xmm0
+; X32-AVX-NEXT:vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; X32-AVX-NEXT:vpxor %xmm1, %xmm1, %xmm1
+; X32-AVX-NEXT:vpblendw {{.*#+}} xmm0 = 
xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
+; X32-AVX-NEXT:vcvtdq2ps %xmm0, %xmm0
+; X32-AVX-NEXT:vmulps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32-AVX-NEXT:retl
+;
+; X64-SSE-LABEL: lshr_truncate_mask_ucvt_4i64_4f32:
+; X64-SSE:   # %bb.0:
+; X64-SSE-NEXT:movdqu (%rdi), %xmm0
+; X64-SSE-NEXT:movdqu 16(%rdi), %xmm1
+; X64-SSE-NEXT:psrlq $16, %xmm1
+; X64-SSE-NEXT:psrlq $16, %xmm0
+; X64-SSE-NEXT:shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; X64-SSE-NEXT:andps {{.*}}(%rip), %xmm0
+; X64-SSE-NEXT:cvtdq2ps %xmm0, %xmm0
+; X64-SSE-NEXT:mulps {{.*}}(%rip), %xmm0
+; X64-SSE-NEXT:retq
+;
+; X64-AVX-LABEL: lshr_truncate_mask_ucvt_4i64_4f32:
+; X64-AVX:   # %bb.0:
+; X64-AVX-NEXT:vmovdqu (%rdi), %xmm0
+; X64-AVX-NEXT:vmovdqu 16(%rdi), %xmm1
+; X64-AVX-NEXT:vpsrlq $16, %xmm1, %xmm1
+; X64-AVX-NEXT:vpsrlq $16, %xmm0, %xmm0
+; X64-AVX-NEXT:vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; X64-AVX-NEXT:vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX-NEXT:vpblendw {{.*#+}} xmm0 = 
xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
+; X64-AVX-NEXT:vcvtdq2ps %xmm0, %xmm0
+; X64-AVX-NEXT:vmulps {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT:retq
+  %load = load <4 x i64>, <4 x i64>* %p0, align 2
+  %lshr = lshr <4 x i64> %load, 
+  %and = and <4 x i64> %lshr, 
+  %uitofp = uitofp <4 x i64> %and to <4 x float>
+  %fmul = fmul <4 x float> %uitofp, 
+  ret <4 x float> %fmul
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 69bc099 - [DAGCombiner] Enable SimplifyDemandedBits vector support for TRUNCATE (REAPPLIED).

2021-01-21 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-21T13:01:34Z
New Revision: 69bc0990a9181e6eb86228276d2f59435a7fae67

URL: 
https://github.com/llvm/llvm-project/commit/69bc0990a9181e6eb86228276d2f59435a7fae67
DIFF: 
https://github.com/llvm/llvm-project/commit/69bc0990a9181e6eb86228276d2f59435a7fae67.diff

LOG: [DAGCombiner] Enable SimplifyDemandedBits vector support for TRUNCATE 
(REAPPLIED).

Add DemandedElts support inside the TRUNCATE analysis.

REAPPLIED - this was reverted by @hans at rGa51226057fc3 due to an issue with 
vector shift amount types, which was fixed in rG935bacd3a724 and an additional 
test case added at rG0ca81b90d19d

Differential Revision: https://reviews.llvm.org/D56387

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/aarch64-smull.ll
llvm/test/CodeGen/AArch64/lowerMUL-newload.ll
llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
llvm/test/CodeGen/ARM/lowerMUL-newload.ll
llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
llvm/test/CodeGen/Thumb2/mve-vmulh.ll
llvm/test/CodeGen/X86/combine-sra.ll
llvm/test/CodeGen/X86/known-signbits-vector.ll
llvm/test/CodeGen/X86/min-legal-vector-width.ll
llvm/test/CodeGen/X86/uint_to_fp-3.ll
llvm/test/CodeGen/X86/vector-trunc.ll

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 067bc436acdd..32c7ac2f6cfb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11952,8 +11952,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   }
 
   // Simplify the operands using demanded-bits information.
-  if (!VT.isVector() &&
-  SimplifyDemandedBits(SDValue(N, 0)))
+  if (SimplifyDemandedBits(SDValue(N, 0)))
 return SDValue(N, 0);
 
   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)

diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp 
b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index cac4d8fff8bb..e2f42d050740 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1986,7 +1986,8 @@ bool TargetLowering::SimplifyDemandedBits(
 // zero/one bits live out.
 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
-if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
+if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
+ Depth + 1))
   return true;
 Known = Known.trunc(BitWidth);
 
@@ -2009,9 +2010,9 @@ bool TargetLowering::SimplifyDemandedBits(
   // undesirable.
   break;
 
-SDValue ShAmt = Src.getOperand(1);
-auto *ShAmtC = dyn_cast(ShAmt);
-if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
+const APInt *ShAmtC =
+TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
+if (!ShAmtC)
   break;
 uint64_t ShVal = ShAmtC->getZExtValue();
 

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c7bcd4de046c..6dd081dc3cb7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3399,6 +3399,7 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, 
SelectionDAG &DAG) {
 
 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
   return N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ANY_EXTEND ||
  isExtendedBUILD_VECTOR(N, DAG, true);
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll 
b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 0a692192ec8b..0c232a4bf5a8 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -96,7 +96,7 @@ define <8 x i16> @amull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* 
%B) nounwind {
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:ldr d0, [x0]
 ; CHECK-NEXT:ldr d1, [x1]
-; CHECK-NEXT:umull v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:smull v0.8h, v0.8b, v1.8b
 ; CHECK-NEXT:bic v0.8h, #255, lsl #8
 ; CHECK-NEXT:ret
   %tmp1 = load <8 x i8>, <8 x i8>* %A
@@ -113,7 +113,7 @@ define <4 x i32> @amull_v4i16_v4i32(<4 x i16>* %A, <4 x 
i16>* %B) nounwind {
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:ldr d0, [x0]
 ; CHECK-NEXT:ldr d1, [x1]
-; CHECK-NEXT:umull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:smull v0.4s, v0.4h, v1.4h
 ; CHECK-NEXT:movi v1.2d, #0x00
 ; CHECK-NEXT:and v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:ret
@@ -131,7 +131,7 @@ define <2 x i64> @amull_v2i32_v2i64(<2 x i32>* %A, <2 x 
i32>* %B) nounwind {
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:ldr d0, [x0]
 ; CHECK

[llvm-branch-commits] [llvm] 481659c - [X86][SSE] Add v16i8 02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu shuffle test

2021-01-22 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-22T10:05:22Z
New Revision: 481659c55c4ec1e133bec82a909e9e6baee70a28

URL: 
https://github.com/llvm/llvm-project/commit/481659c55c4ec1e133bec82a909e9e6baee70a28
DIFF: 
https://github.com/llvm/llvm-project/commit/481659c55c4ec1e133bec82a909e9e6baee70a28.diff

LOG: [X86][SSE] Add v16i8 02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu 
shuffle test

Added: 


Modified: 
llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll

Removed: 




diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll 
b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
index ee3cf43e8f2f7..012b9f07dc6d0 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -761,6 +761,60 @@ define <16 x i8> 
@shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15(
   ret <16 x i8> %shuffle
 }
 
+define <16 x i8> 
@shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a, 
<16 x i8> %b)  {
+; SSE2-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; SSE2:   # %bb.0:
+; SSE2-NEXT:pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
+; SSE2-NEXT:punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:psrlq $16, %xmm0
+; SSE2-NEXT:packuswb %xmm0, %xmm0
+; SSE2-NEXT:retq
+;
+; SSSE3-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; SSSE3:   # %bb.0:
+; SSSE3-NEXT:punpcklbw {{.*#+}} xmm0 = 
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT:pshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT:retq
+;
+; SSE41-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; SSE41:   # %bb.0:
+; SSE41-NEXT:punpcklbw {{.*#+}} xmm0 = 
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE41-NEXT:pshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSE41-NEXT:retq
+;
+; AVX1-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; AVX1:   # %bb.0:
+; AVX1-NEXT:vpunpcklbw {{.*#+}} xmm0 = 
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:vpshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX1-NEXT:retq
+;
+; AVX2-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; AVX2:   # %bb.0:
+; AVX2-NEXT:vpunpcklbw {{.*#+}} xmm0 = 
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX2-NEXT:vpshufb {{.*#+}} xmm0 = xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:retq
+;
+; AVX512VLBW-LABEL: 
shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; AVX512VLBW:   # %bb.0:
+; AVX512VLBW-NEXT:vpunpcklbw {{.*#+}} xmm0 = 
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX512VLBW-NEXT:vpshufb {{.*#+}} xmm0 = 
xmm0[4,9,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512VLBW-NEXT:retq
+;
+; AVX512VLVBMI-LABEL: 
shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; AVX512VLVBMI:   # %bb.0:
+; AVX512VLVBMI-NEXT:vpbroadcastw {{.*#+}} xmm2 = 
[5122,5122,5122,5122,5122,5122,5122,5122]
+; AVX512VLVBMI-NEXT:vpermt2b %xmm1, %xmm2, %xmm0
+; AVX512VLVBMI-NEXT:retq
+;
+; XOP-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; XOP:   # %bb.0:
+; XOP-NEXT:vpperm {{.*#+}} xmm0 = 
xmm0[2],xmm1[4],xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; XOP-NEXT:retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> 
+  ret <16 x i8> %shuffle
+}
+
 ; PR39387
 define <16 x i8> @shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4(<16 x 
i8> %a, <16 x i8> %b) {
 ; SSE2-LABEL: shuffle_v16i8_5_6_7_8_9_10_27_28_29_30_31_0_1_2_3_4:



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 636b877 - [X86][SSE] Add PR48823 HSUB test case

2021-01-22 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-22T10:05:22Z
New Revision: 636b87785c1de64134254b688d30ab1248b16ed2

URL: 
https://github.com/llvm/llvm-project/commit/636b87785c1de64134254b688d30ab1248b16ed2
DIFF: 
https://github.com/llvm/llvm-project/commit/636b87785c1de64134254b688d30ab1248b16ed2.diff

LOG: [X86][SSE] Add PR48823 HSUB test case

Added: 


Modified: 
llvm/test/CodeGen/X86/haddsub-3.ll

Removed: 




diff  --git a/llvm/test/CodeGen/X86/haddsub-3.ll 
b/llvm/test/CodeGen/X86/haddsub-3.ll
index 05ab83f8604de..651ab4ef39355 100644
--- a/llvm/test/CodeGen/X86/haddsub-3.ll
+++ b/llvm/test/CodeGen/X86/haddsub-3.ll
@@ -156,3 +156,56 @@ define <4 x double> @PR41414(i64 %x, <4 x double> %y) {
   %t3 = fadd <4 x double> zeroinitializer, %t2
   ret <4 x double> %t3
 }
+
+define <4 x float> @PR48823(<4 x float> %0, <4 x float> %1) {
+; SSE2-LABEL: PR48823:
+; SSE2:   # %bb.0:
+; SSE2-NEXT:movaps %xmm0, %xmm2
+; SSE2-NEXT:shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
+; SSE2-NEXT:shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
+; SSE2-NEXT:subps %xmm2, %xmm0
+; SSE2-NEXT:retq
+;
+; SSSE3-SLOW-LABEL: PR48823:
+; SSSE3-SLOW:   # %bb.0:
+; SSSE3-SLOW-NEXT:movaps %xmm0, %xmm2
+; SSSE3-SLOW-NEXT:shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
+; SSSE3-SLOW-NEXT:shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
+; SSSE3-SLOW-NEXT:subps %xmm2, %xmm0
+; SSSE3-SLOW-NEXT:retq
+;
+; SSSE3-FAST-LABEL: PR48823:
+; SSSE3-FAST:   # %bb.0:
+; SSSE3-FAST-NEXT:movaps %xmm0, %xmm2
+; SSSE3-FAST-NEXT:shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
+; SSSE3-FAST-NEXT:shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
+; SSSE3-FAST-NEXT:subps %xmm2, %xmm0
+; SSSE3-FAST-NEXT:retq
+;
+; AVX1-SLOW-LABEL: PR48823:
+; AVX1-SLOW:   # %bb.0:
+; AVX1-SLOW-NEXT:vshufps {{.*#+}} xmm2 = xmm0[1,1],xmm1[2,3]
+; AVX1-SLOW-NEXT:vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
+; AVX1-SLOW-NEXT:vsubps %xmm2, %xmm0, %xmm0
+; AVX1-SLOW-NEXT:retq
+;
+; AVX1-FAST-LABEL: PR48823:
+; AVX1-FAST:   # %bb.0:
+; AVX1-FAST-NEXT:vshufps {{.*#+}} xmm2 = xmm0[1,1],xmm1[2,3]
+; AVX1-FAST-NEXT:vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
+; AVX1-FAST-NEXT:vsubps %xmm2, %xmm0, %xmm0
+; AVX1-FAST-NEXT:retq
+;
+; AVX2-LABEL: PR48823:
+; AVX2:   # %bb.0:
+; AVX2-NEXT:vshufps {{.*#+}} xmm2 = xmm0[1,1],xmm1[2,3]
+; AVX2-NEXT:vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
+; AVX2-NEXT:vsubps %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:retq
+  %3 = shufflevector <4 x float> %0, <4 x float> poison, <4 x i32> 
+  %4 = fsub <4 x float> %0, %3
+  %5 = shufflevector <4 x float> %1, <4 x float> poison, <4 x i32> 
+  %6 = fsub <4 x float> %5, %1
+  %7 = shufflevector <4 x float> %4, <4 x float> %6, <4 x i32> 
+  ret <4 x float> %7
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] ffe72f9 - [X86][SSE] Don't fold shuffle(binop(), binop()) -> binop(shuffle(), shuffle()) if the shuffle are splats

2021-01-22 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-22T11:31:38Z
New Revision: ffe72f987f4866c46c18174cdb750dea88bedba3

URL: 
https://github.com/llvm/llvm-project/commit/ffe72f987f4866c46c18174cdb750dea88bedba3
DIFF: 
https://github.com/llvm/llvm-project/commit/ffe72f987f4866c46c18174cdb750dea88bedba3.diff

LOG: [X86][SSE] Don't fold shuffle(binop(),binop()) -> 
binop(shuffle(),shuffle()) if the shuffle are splats

rGbe69e66b1cd8 added the fold, but DAGCombiner.visitVECTOR_SHUFFLE doesn't 
merge shuffles if the inner shuffle is a splat, so we need to bail.

The non-fast-horiz-ops paths see some minor regressions, we might be able to 
improve on this after lowering to target shuffles.

Fix PR48823

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/haddsub-3.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c5cc23f6236e..895a02e5c98e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37964,23 +37964,24 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG 
&DAG,
   return HAddSub;
 
 // Merge shuffles through binops if its likely we'll be able to merge it
-// with other shuffles.
+// with other shuffles (as long as they aren't splats).
 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
 // TODO: We might be able to move this to DAGCombiner::visitVECTOR_SHUFFLE.
 if (auto *SVN = dyn_cast(N)) {
   unsigned SrcOpcode = N->getOperand(0).getOpcode();
   if (SrcOpcode == N->getOperand(1).getOpcode() && TLI.isBinOp(SrcOpcode) 
&&
   N->isOnlyUserOf(N->getOperand(0).getNode()) &&
-  N->isOnlyUserOf(N->getOperand(1).getNode()) &&
-  VT.getScalarSizeInBits() >= 32) {
+  N->isOnlyUserOf(N->getOperand(1).getNode())) {
 SDValue Op00 = N->getOperand(0).getOperand(0);
 SDValue Op10 = N->getOperand(1).getOperand(0);
 SDValue Op01 = N->getOperand(0).getOperand(1);
 SDValue Op11 = N->getOperand(1).getOperand(1);
-if ((Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
- Op10.getOpcode() == ISD::VECTOR_SHUFFLE) &&
-(Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
- Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
+auto *SVN00 = dyn_cast(Op00);
+auto *SVN10 = dyn_cast(Op10);
+auto *SVN01 = dyn_cast(Op01);
+auto *SVN11 = dyn_cast(Op11);
+if (((SVN00 && !SVN00->isSplat()) || (SVN10 && !SVN10->isSplat())) &&
+((SVN01 && !SVN01->isSplat()) || (SVN11 && !SVN11->isSplat( {
   SDLoc DL(N);
   ArrayRef Mask = SVN->getMask();
   SDValue LHS = DAG.getVectorShuffle(VT, DL, Op00, Op10, Mask);

diff  --git a/llvm/test/CodeGen/X86/haddsub-3.ll 
b/llvm/test/CodeGen/X86/haddsub-3.ll
index 651ab4ef3935..48d4fe556555 100644
--- a/llvm/test/CodeGen/X86/haddsub-3.ll
+++ b/llvm/test/CodeGen/X86/haddsub-3.ll
@@ -161,46 +161,49 @@ define <4 x float> @PR48823(<4 x float> %0, <4 x float> 
%1) {
 ; SSE2-LABEL: PR48823:
 ; SSE2:   # %bb.0:
 ; SSE2-NEXT:movaps %xmm0, %xmm2
-; SSE2-NEXT:shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
-; SSE2-NEXT:shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
+; SSE2-NEXT:shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1]
 ; SSE2-NEXT:subps %xmm2, %xmm0
+; SSE2-NEXT:movaps %xmm1, %xmm2
+; SSE2-NEXT:shufps {{.*#+}} xmm2 = xmm2[2,2],xmm1[2,2]
+; SSE2-NEXT:subps %xmm1, %xmm2
+; SSE2-NEXT:shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
 ; SSE2-NEXT:retq
 ;
 ; SSSE3-SLOW-LABEL: PR48823:
 ; SSSE3-SLOW:   # %bb.0:
-; SSSE3-SLOW-NEXT:movaps %xmm0, %xmm2
-; SSSE3-SLOW-NEXT:shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
-; SSSE3-SLOW-NEXT:shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
+; SSSE3-SLOW-NEXT:movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
 ; SSSE3-SLOW-NEXT:subps %xmm2, %xmm0
+; SSSE3-SLOW-NEXT:movsldup {{.*#+}} xmm2 = xmm1[0,0,2,2]
+; SSSE3-SLOW-NEXT:subps %xmm1, %xmm2
+; SSSE3-SLOW-NEXT:shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
 ; SSSE3-SLOW-NEXT:retq
 ;
 ; SSSE3-FAST-LABEL: PR48823:
 ; SSSE3-FAST:   # %bb.0:
-; SSSE3-FAST-NEXT:movaps %xmm0, %xmm2
-; SSSE3-FAST-NEXT:shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
-; SSSE3-FAST-NEXT:shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
-; SSSE3-FAST-NEXT:subps %xmm2, %xmm0
+; SSSE3-FAST-NEXT:hsubps %xmm1, %xmm0
 ; SSSE3-FAST-NEXT:retq
 ;
 ; AVX1-SLOW-LABEL: PR48823:
 ; AVX1-SLOW:   # %bb.0:
-; AVX1-SLOW-NEXT:vshufps {{.*#+}} xmm2 = xmm0[1,1],xmm1[2,3]
-; AVX1-SLOW-NEXT:vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,2]
+; AVX1-SLOW-NEXT:vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
 ; AVX1-SLOW-NEXT:vsubps %xmm2, %xmm0, %xmm0
+; AVX1-SLOW-NEXT:vmovsldup {{.*#+}} xmm2 = xmm1[0,0,2,2]
+; AVX1-SLOW-NEXT:vsubps %xmm1, %xmm2, %xmm1
+; AVX1-S

[llvm-branch-commits] [llvm] 5dbe5d2 - [DAG] Commute shuffle(splat(A, u), shuffle(C, D)) -> shuffle'(shuffle(C, D), splat(A, u))

2021-01-22 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-22T11:43:18Z
New Revision: 5dbe5d2c91209db9830d5b17093c408f22a7b471

URL: 
https://github.com/llvm/llvm-project/commit/5dbe5d2c91209db9830d5b17093c408f22a7b471
DIFF: 
https://github.com/llvm/llvm-project/commit/5dbe5d2c91209db9830d5b17093c408f22a7b471.diff

LOG: [DAG] Commute shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), 
splat(A,u))

We only merge shuffles if the inner (LHS) shuffle is a non-splat, so commute 
these shuffles to improve merging of multiple shuffles.

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/haddsub-undef.ll

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 32c7ac2f6cfb..72640af23e40 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20799,26 +20799,35 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
 }
   }
 
-  // Canonicalize shuffles according to rules:
-  //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
-  //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
-  //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
-  if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
-  N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
-  TLI.isTypeLegal(VT)) {
-// The incoming shuffle must be of the same type as the result of the
-// current shuffle.
-assert(N1->getOperand(0).getValueType() == VT &&
-   "Shuffle types don't match");
-
-SDValue SV0 = N1->getOperand(0);
-SDValue SV1 = N1->getOperand(1);
-bool HasSameOp0 = N0 == SV0;
-bool IsSV1Undef = SV1.isUndef();
-if (HasSameOp0 || IsSV1Undef || N0 == SV1)
-  // Commute the operands of this shuffle so that next rule
-  // will trigger.
+  if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
+// Canonicalize shuffles according to rules:
+//  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
+//  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
+//  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
+if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
+  // The incoming shuffle must be of the same type as the result of the
+  // current shuffle.
+  assert(N1->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+  SDValue SV0 = N1->getOperand(0);
+  SDValue SV1 = N1->getOperand(1);
+  bool HasSameOp0 = N0 == SV0;
+  bool IsSV1Undef = SV1.isUndef();
+  if (HasSameOp0 || IsSV1Undef || N0 == SV1)
+// Commute the operands of this shuffle so merging below will trigger.
+return DAG.getCommutedVectorShuffle(*SVN);
+}
+
+// Canonicalize splat shuffles to the RHS to improve merging below.
+//  shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
+if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
+N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+cast(N0)->isSplat() &&
+!cast(N1)->isSplat()) {
   return DAG.getCommutedVectorShuffle(*SVN);
+}
   }
 
   // Compute the combined shuffle mask for a shuffle with SV0 as the first

diff  --git a/llvm/test/CodeGen/X86/haddsub-undef.ll 
b/llvm/test/CodeGen/X86/haddsub-undef.ll
index 48ee31fe64fc..68d058433179 100644
--- a/llvm/test/CodeGen/X86/haddsub-undef.ll
+++ b/llvm/test/CodeGen/X86/haddsub-undef.ll
@@ -583,17 +583,11 @@ define <4 x float> @add_ps_016(<4 x float> %0, <4 x 
float> %1) {
 ; SSE-NEXT:movaps %xmm1, %xmm0
 ; SSE-NEXT:retq
 ;
-; AVX-SLOW-LABEL: add_ps_016:
-; AVX-SLOW:   # %bb.0:
-; AVX-SLOW-NEXT:vhaddps %xmm0, %xmm1, %xmm0
-; AVX-SLOW-NEXT:vpermilps {{.*#+}} xmm0 = xmm0[1,0,3,3]
-; AVX-SLOW-NEXT:retq
-;
-; AVX-FAST-LABEL: add_ps_016:
-; AVX-FAST:   # %bb.0:
-; AVX-FAST-NEXT:vhaddps %xmm0, %xmm1, %xmm0
-; AVX-FAST-NEXT:vpermilps {{.*#+}} xmm0 = xmm0[1,0,3,1]
-; AVX-FAST-NEXT:retq
+; AVX-LABEL: add_ps_016:
+; AVX:   # %bb.0:
+; AVX-NEXT:vhaddps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:vpermilps {{.*#+}} xmm0 = xmm0[1,0,3,3]
+; AVX-NEXT:retq
   %3 = shufflevector <4 x float> %1, <4 x float> %0, <2 x i32> 
   %4 = shufflevector <4 x float> %1, <4 x float> %0, <2 x i32> 
   %5 = fadd <2 x float> %3, %4



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] b1166e1 - [X86][AVX] combineX86ShufflesRecursively - attempt to constant fold before widening shuffle inputs

2021-01-22 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-22T13:19:35Z
New Revision: b1166e1317c54e9cfbb28b280af12313cf325a86

URL: 
https://github.com/llvm/llvm-project/commit/b1166e1317c54e9cfbb28b280af12313cf325a86
DIFF: 
https://github.com/llvm/llvm-project/commit/b1166e1317c54e9cfbb28b280af12313cf325a86.diff

LOG: [X86][AVX] combineX86ShufflesRecursively - attempt to constant fold before 
widening shuffle inputs

combineX86ShufflesConstants/canonicalizeShuffleMaskWithHorizOp can both 
handle/earlyout shuffles with inputs of different widths, so delay widening as 
late as possible to make it easier to match constant folds etc.

The plan is to eventually move the widening inside combineX86ShuffleChain so 
that we don't create any new nodes unless we successfully combine the shuffles.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 895a02e5c98e..a293c48a824a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36610,6 +36610,17 @@ static SDValue combineX86ShufflesRecursively(
 }
   }
 
+  // Attempt to constant fold all of the constant source ops.
+  if (SDValue Cst = combineX86ShufflesConstants(
+  Ops, Mask, Root, HasVariableMask, DAG, Subtarget))
+return Cst;
+
+  // Canonicalize the combined shuffle mask chain with horizontal ops.
+  // NOTE: This will update the Ops and Mask.
+  if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp(
+  Ops, Mask, RootSizeInBits, SDLoc(Root), DAG, Subtarget))
+return DAG.getBitcast(Root.getValueType(), HOp);
+
   // Widen any subvector shuffle inputs we've collected.
   if (any_of(Ops, [RootSizeInBits](SDValue Op) {
 return Op.getValueSizeInBits() < RootSizeInBits;
@@ -36622,17 +36633,6 @@ static SDValue combineX86ShufflesRecursively(
 resolveTargetShuffleInputsAndMask(Ops, Mask);
   }
 
-  // Attempt to constant fold all of the constant source ops.
-  if (SDValue Cst = combineX86ShufflesConstants(
-  Ops, Mask, Root, HasVariableMask, DAG, Subtarget))
-return Cst;
-
-  // Canonicalize the combined shuffle mask chain with horizontal ops.
-  // NOTE: This will update the Ops and Mask.
-  if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp(
-  Ops, Mask, RootSizeInBits, SDLoc(Root), DAG, Subtarget))
-return DAG.getBitcast(Root.getValueType(), HOp);
-
   // We can only combine unary and binary shuffle mask cases.
   if (Ops.size() <= 2) {
 // Minor canonicalization of the accumulated shuffle mask to make it easier

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll 
b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
index 2c53579f7627..c358250305a7 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
@@ -108,13 +108,12 @@ define void @PR46178(i16* %0) {
 ; X86-NEXT:vmovdqu (%eax), %ymm1
 ; X86-NEXT:vpmovqw %ymm0, %xmm0
 ; X86-NEXT:vpmovqw %ymm1, %xmm1
-; X86-NEXT:vinserti128 $1, %xmm1, %ymm0, %ymm0
-; X86-NEXT:vpsllw $8, %ymm0, %ymm0
-; X86-NEXT:vpsraw $8, %ymm0, %ymm0
-; X86-NEXT:vmovapd {{.*#+}} ymm1 = [0,0,2,0,4,0,4,0]
-; X86-NEXT:vxorpd %xmm2, %xmm2, %xmm2
-; X86-NEXT:vpermi2pd %ymm2, %ymm0, %ymm1
-; X86-NEXT:vmovupd %ymm1, (%eax)
+; X86-NEXT:vpsllw $8, %xmm1, %xmm1
+; X86-NEXT:vpsraw $8, %xmm1, %xmm1
+; X86-NEXT:vpsllw $8, %xmm0, %xmm0
+; X86-NEXT:vpsraw $8, %xmm0, %xmm0
+; X86-NEXT:vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[3]
+; X86-NEXT:vmovupd %ymm0, (%eax)
 ; X86-NEXT:vzeroupper
 ; X86-NEXT:retl
 ;



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 4846f6a - [X86][AVX] combineTargetShuffle - simplify the X86ISD::VPERM2X128 subvector matching

2021-01-22 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-22T15:47:22Z
New Revision: 4846f6ab815c34f6ffbc8d4ecde891d917bf2157

URL: 
https://github.com/llvm/llvm-project/commit/4846f6ab815c34f6ffbc8d4ecde891d917bf2157
DIFF: 
https://github.com/llvm/llvm-project/commit/4846f6ab815c34f6ffbc8d4ecde891d917bf2157.diff

LOG: [X86][AVX] combineTargetShuffle - simplify the X86ISD::VPERM2X128 
subvector matching

Simplify vperm2x128(concat(X,Y),concat(Z,W)) folding.

Use collectConcatOps / ISD::INSERT_SUBVECTOR to find the source subvectors 
instead of hardcoded immediate matching.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a293c48a824a..577745c42d81 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37324,41 +37324,33 @@ static SDValue combineTargetShuffle(SDValue N, 
SelectionDAG &DAG,
 if (SDValue Res = canonicalizeLaneShuffleWithRepeatedOps(N, DAG, DL))
 return Res;
 
-// If both 128-bit values were inserted into high halves of 256-bit values,
-// the shuffle can be reduced to a concatenation of subvectors:
-// vperm2x128 (ins ?, X, C1), (ins ?, Y, C2), 0x31 --> concat X, Y
-// Note: We are only looking for the exact high/high shuffle mask because 
we
-//   expect to fold other similar patterns before creating this opcode.
-SDValue Ins0 = peekThroughBitcasts(N.getOperand(0));
-SDValue Ins1 = peekThroughBitcasts(N.getOperand(1));
+// Combine vperm2x128 subvector shuffle with an inner concat pattern.
+// vperm2x128(concat(X,Y),concat(Z,W)) --> concat X,Y etc.
+auto FindSubVector128 = [&](unsigned Idx) {
+  if (Idx > 3)
+return SDValue();
+  SDValue Src = peekThroughBitcasts(N.getOperand(Idx < 2 ? 0 : 1));
+  SmallVector SubOps;
+  if (collectConcatOps(Src.getNode(), SubOps) && SubOps.size() == 2)
+return SubOps[Idx & 1];
+  unsigned NumElts = Src.getValueType().getVectorNumElements();
+  if ((Idx & 1) == 1 && Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
+  Src.getOperand(1).getValueSizeInBits() == 128 &&
+  Src.getConstantOperandAPInt(2) == (NumElts / 2)) {
+return Src.getOperand(1);
+  }
+  return SDValue();
+};
 unsigned Imm = N.getConstantOperandVal(2);
-
-// Handle subvector splat by tweaking values to match binary concat.
-// vperm2x128 (ins ?, X, C1), undef, 0x11 ->
-// vperm2x128 (ins ?, X, C1), (ins ?, X, C1), 0x31 -> concat X, X
-if (Imm == 0x11 && Ins1.isUndef()) {
-  Imm = 0x31;
-  Ins1 = Ins0;
+if (SDValue SubLo = FindSubVector128(Imm & 0x0F)) {
+  if (SDValue SubHi = FindSubVector128((Imm & 0xF0) >> 4)) {
+MVT SubVT = VT.getHalfNumVectorElementsVT();
+SubLo = DAG.getBitcast(SubVT, SubLo);
+SubHi = DAG.getBitcast(SubVT, SubHi);
+return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubLo, SubHi);
+  }
 }
-
-if (!(Imm == 0x31 &&
-  Ins0.getOpcode() == ISD::INSERT_SUBVECTOR &&
-  Ins1.getOpcode() == ISD::INSERT_SUBVECTOR &&
-  Ins0.getValueType() == Ins1.getValueType()))
-  return SDValue();
-
-SDValue X = Ins0.getOperand(1);
-SDValue Y = Ins1.getOperand(1);
-unsigned C1 = Ins0.getConstantOperandVal(2);
-unsigned C2 = Ins1.getConstantOperandVal(2);
-MVT SrcVT = X.getSimpleValueType();
-unsigned SrcElts = SrcVT.getVectorNumElements();
-if (SrcVT != Y.getSimpleValueType() || SrcVT.getSizeInBits() != 128 ||
-C1 != SrcElts || C2 != SrcElts)
-  return SDValue();
-
-return DAG.getBitcast(VT, DAG.getNode(ISD::CONCAT_VECTORS, DL,
-  Ins1.getValueType(), X, Y));
+return SDValue();
   }
   case X86ISD::PSHUFD:
   case X86ISD::PSHUFLW:



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] c33d36e - [X86][AVX] canonicalizeLaneShuffleWithRepeatedOps - handle unary vperm2x128(permute/shift(x, c), undef) cases

2021-01-22 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-22T15:47:23Z
New Revision: c33d36e0667e7fff186243ac7a3a9cd63e797438

URL: 
https://github.com/llvm/llvm-project/commit/c33d36e0667e7fff186243ac7a3a9cd63e797438
DIFF: 
https://github.com/llvm/llvm-project/commit/c33d36e0667e7fff186243ac7a3a9cd63e797438.diff

LOG: [X86][AVX] canonicalizeLaneShuffleWithRepeatedOps - handle unary 
vperm2x128(permute/shift(x,c),undef) cases

Fold vperm2x128(permute/shift(x,c),undef) -> 
permute/shift(vperm2x128(x,undef),c)

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx-splat.ll
llvm/test/CodeGen/X86/extract-concat.ll
llvm/test/CodeGen/X86/haddsub-4.ll
llvm/test/CodeGen/X86/known-signbits-vector.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
llvm/test/CodeGen/X86/vector-shuffle-combining.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 577745c42d81..90ed8c920565 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36918,19 +36918,21 @@ static SDValue 
canonicalizeLaneShuffleWithRepeatedOps(SDValue V,
   EVT SrcVT0 = Src0.getValueType();
   EVT SrcVT1 = Src1.getValueType();
 
-  // TODO: Under what circumstances should we push perm2f128 up when we have 
one
-  // active src?
-  if (SrcOpc0 != SrcOpc1 || SrcVT0 != SrcVT1)
+  if (!Src1.isUndef() && (SrcVT0 != SrcVT1 || SrcOpc0 != SrcOpc1))
 return SDValue();
 
   switch (SrcOpc0) {
   case X86ISD::VSHLI:
   case X86ISD::VSRLI:
   case X86ISD::VSRAI:
-if (Src0.getOperand(1) == Src1.getOperand(1)) {
-  SDValue Res = DAG.getNode(
-  X86ISD::VPERM2X128, DL, VT, DAG.getBitcast(VT, Src0.getOperand(0)),
-  DAG.getBitcast(VT, Src1.getOperand(0)), V.getOperand(2));
+  case X86ISD::PSHUFD:
+  case X86ISD::VPERMILPI:
+if (Src1.isUndef() || Src0.getOperand(1) == Src1.getOperand(1)) {
+  SDValue LHS = DAG.getBitcast(VT, Src0.getOperand(0));
+  SDValue RHS =
+  DAG.getBitcast(VT, Src1.isUndef() ? Src1 : Src1.getOperand(0));
+  SDValue Res =
+  DAG.getNode(X86ISD::VPERM2X128, DL, VT, LHS, RHS, V.getOperand(2));
   Res = DAG.getNode(SrcOpc0, DL, SrcVT0, DAG.getBitcast(SrcVT0, Res),
 Src0.getOperand(1));
   return DAG.getBitcast(VT, Res);

diff  --git a/llvm/test/CodeGen/X86/avx-splat.ll 
b/llvm/test/CodeGen/X86/avx-splat.ll
index 3755cf4740ab..7602975c8872 100644
--- a/llvm/test/CodeGen/X86/avx-splat.ll
+++ b/llvm/test/CodeGen/X86/avx-splat.ll
@@ -157,8 +157,8 @@ entry:
 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
 ; CHECK-LABEL: funcH:
 ; CHECK:   # %bb.0: # %entry
-; CHECK-NEXT:vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
 ; CHECK-NEXT:vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
+; CHECK-NEXT:vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
 ; CHECK-NEXT:ret{{[l|q]}}
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> 

diff  --git a/llvm/test/CodeGen/X86/extract-concat.ll 
b/llvm/test/CodeGen/X86/extract-concat.ll
index 26e07d86bfc3..49ac851d88fc 100644
--- a/llvm/test/CodeGen/X86/extract-concat.ll
+++ b/llvm/test/CodeGen/X86/extract-concat.ll
@@ -70,12 +70,12 @@ define <16 x i64> @catcat(<4 x i64> %x) {
 ; AVX1:   # %bb.0:
 ; AVX1-NEXT:vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
 ; AVX1-NEXT:vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,2,3]
-; AVX1-NEXT:vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
-; AVX1-NEXT:vperm2f128 {{.*#+}} ymm3 = ymm1[2,3,2,3]
 ; AVX1-NEXT:vpermilps {{.*#+}} xmm1 = xmm0[0,1,0,1]
 ; AVX1-NEXT:vinsertf128 $1, %xmm1, %ymm1, %ymm4
-; AVX1-NEXT:vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; AVX1-NEXT:vinsertf128 $1, %xmm0, %ymm0, %ymm1
+; AVX1-NEXT:vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX1-NEXT:vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; AVX1-NEXT:vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
+; AVX1-NEXT:vpermilpd {{.*#+}} ymm3 = ymm0[1,1,3,3]
 ; AVX1-NEXT:vmovaps %ymm4, %ymm0
 ; AVX1-NEXT:retq
 ;

diff  --git a/llvm/test/CodeGen/X86/haddsub-4.ll 
b/llvm/test/CodeGen/X86/haddsub-4.ll
index 6003f98b9371..2e077d6247ba 100644
--- a/llvm/test/CodeGen/X86/haddsub-4.ll
+++ b/llvm/test/CodeGen/X86/haddsub-4.ll
@@ -65,8 +65,8 @@ define <8 x float> @hadd_reverse_v8f32(<8 x float> %a0, <8 x 
float> %a1) {
 ; AVX1-LABEL: hadd_reverse_v8f32:
 ; AVX1:   # %bb.0:
 ; AVX1-NEXT:vhaddps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT:vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
 ; AVX1-NEXT:vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT:vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
 ; AVX1-NEXT:retq
 ;
 ; AVX2-LABEL: hadd_reverse_v8f32:
@@ -97,10 +97,10 @@ define <8 x float> @hadd_reverse2_v8f32(<8 x float> %a0, <8 
x float> %a1) {
 ;
 ; AVX1-LABEL: hadd_rev

[llvm-branch-commits] [llvm] bd122f6 - [X86][AVX] canonicalizeLaneShuffleWithRepeatedOps - handle vperm2x128(movddup(x), movddup(y)) cases

2021-01-22 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-22T16:05:19Z
New Revision: bd122f6d217862b4631ac118c58f62a7dec16a02

URL: 
https://github.com/llvm/llvm-project/commit/bd122f6d217862b4631ac118c58f62a7dec16a02
DIFF: 
https://github.com/llvm/llvm-project/commit/bd122f6d217862b4631ac118c58f62a7dec16a02.diff

LOG: [X86][AVX] canonicalizeLaneShuffleWithRepeatedOps - handle 
vperm2x128(movddup(x),movddup(y)) cases

Fold vperm2x128(movddup(x),movddup(y)) -> movddup(vperm2x128(x,y))

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/extract-concat.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 90ed8c920565..70203dacef09 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36922,6 +36922,15 @@ static SDValue 
canonicalizeLaneShuffleWithRepeatedOps(SDValue V,
 return SDValue();
 
   switch (SrcOpc0) {
+  case X86ISD::MOVDDUP: {
+SDValue LHS = DAG.getBitcast(VT, Src0.getOperand(0));
+SDValue RHS =
+DAG.getBitcast(VT, Src1.isUndef() ? Src1 : Src1.getOperand(0));
+SDValue Res =
+DAG.getNode(X86ISD::VPERM2X128, DL, VT, LHS, RHS, V.getOperand(2));
+Res = DAG.getNode(SrcOpc0, DL, SrcVT0, DAG.getBitcast(SrcVT0, Res));
+return DAG.getBitcast(VT, Res);
+  }
   case X86ISD::VSHLI:
   case X86ISD::VSRLI:
   case X86ISD::VSRAI:

diff  --git a/llvm/test/CodeGen/X86/extract-concat.ll 
b/llvm/test/CodeGen/X86/extract-concat.ll
index 49ac851d88fc..f979f23f82f8 100644
--- a/llvm/test/CodeGen/X86/extract-concat.ll
+++ b/llvm/test/CodeGen/X86/extract-concat.ll
@@ -68,13 +68,12 @@ define <16 x i64> @catcat(<4 x i64> %x) {
 ;
 ; AVX1-LABEL: catcat:
 ; AVX1:   # %bb.0:
-; AVX1-NEXT:vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
-; AVX1-NEXT:vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,2,3]
 ; AVX1-NEXT:vpermilps {{.*#+}} xmm1 = xmm0[0,1,0,1]
 ; AVX1-NEXT:vinsertf128 $1, %xmm1, %ymm1, %ymm4
 ; AVX1-NEXT:vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; AVX1-NEXT:vinsertf128 $1, %xmm1, %ymm1, %ymm1
 ; AVX1-NEXT:vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
+; AVX1-NEXT:vmovddup {{.*#+}} ymm2 = ymm0[0,0,2,2]
 ; AVX1-NEXT:vpermilpd {{.*#+}} ymm3 = ymm0[1,1,3,3]
 ; AVX1-NEXT:vmovaps %ymm4, %ymm0
 ; AVX1-NEXT:retq

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll 
b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
index 38600884262c..80acaef8a0a0 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -109,8 +109,8 @@ define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 
x double> %b) {
 define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
 ; AVX1-LABEL: shuffle_v4f64_2200:
 ; AVX1:   # %bb.0:
-; AVX1-NEXT:vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
 ; AVX1-NEXT:vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT:vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
 ; AVX1-NEXT:retq
 ;
 ; AVX2-LABEL: shuffle_v4f64_2200:
@@ -129,8 +129,8 @@ define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 
x double> %b) {
 define <4 x double> @shuffle_v4f64_(<4 x double> %a, <4 x double> %b) {
 ; AVX1-LABEL: shuffle_v4f64_:
 ; AVX1:   # %bb.0:
-; AVX1-NEXT:vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
 ; AVX1-NEXT:vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
+; AVX1-NEXT:vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
 ; AVX1-NEXT:retq
 ;
 ; AVX2-LABEL: shuffle_v4f64_:
@@ -149,8 +149,8 @@ define <4 x double> @shuffle_v4f64_(<4 x double> %a, <4 
x double> %b) {
 define <4 x double> @shuffle_v4f64__bc(<4 x i64> %a, <4 x i64> %b) {
 ; AVX1-LABEL: shuffle_v4f64__bc:
 ; AVX1:   # %bb.0:
-; AVX1-NEXT:vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
 ; AVX1-NEXT:vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
+; AVX1-NEXT:vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
 ; AVX1-NEXT:retq
 ;
 ; AVX2-LABEL: shuffle_v4f64__bc:
@@ -856,8 +856,8 @@ define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x 
i64> %b) {
 define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
 ; AVX1-LABEL: shuffle_v4i64_2200:
 ; AVX1:   # %bb.0:
-; AVX1-NEXT:vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
 ; AVX1-NEXT:vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT:vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
 ; AVX1-NEXT:retq
 ;
 ; AVX2-LABEL: shuffle_v4i64_2200:



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 23b4198 - [Support] Add KnownBits::icmp helpers.

2021-01-04 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-04T12:46:27Z
New Revision: 23b41986527a3fc5615480a8f7a0b0debd5fcef4

URL: 
https://github.com/llvm/llvm-project/commit/23b41986527a3fc5615480a8f7a0b0debd5fcef4
DIFF: 
https://github.com/llvm/llvm-project/commit/23b41986527a3fc5615480a8f7a0b0debd5fcef4.diff

LOG: [Support] Add KnownBits::icmp helpers.

Check if all possible values for a pair of knownbits give the same icmp result 
- these are based off the checks performed in InstCombineCompares.cpp and 
D86578.

Add exhaustive unit test coverage - a followup will update 
InstCombineCompares.cpp to use this.

Added: 


Modified: 
llvm/include/llvm/Support/KnownBits.h
llvm/lib/Support/KnownBits.cpp
llvm/unittests/Support/KnownBitsTest.cpp

Removed: 




diff  --git a/llvm/include/llvm/Support/KnownBits.h 
b/llvm/include/llvm/Support/KnownBits.h
index ec88b9807174..edb771d659e2 100644
--- a/llvm/include/llvm/Support/KnownBits.h
+++ b/llvm/include/llvm/Support/KnownBits.h
@@ -15,6 +15,7 @@
 #define LLVM_SUPPORT_KNOWNBITS_H
 
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Optional.h"
 
 namespace llvm {
 
@@ -328,6 +329,36 @@ struct KnownBits {
   /// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS.
   static KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS);
 
+  /// Determine if these known bits always give the same ICMP_EQ result.
+  static Optional eq(const KnownBits &LHS, const KnownBits &RHS);
+
+  /// Determine if these known bits always give the same ICMP_NE result.
+  static Optional ne(const KnownBits &LHS, const KnownBits &RHS);
+
+  /// Determine if these known bits always give the same ICMP_UGT result.
+  static Optional ugt(const KnownBits &LHS, const KnownBits &RHS);
+
+  /// Determine if these known bits always give the same ICMP_UGE result.
+  static Optional uge(const KnownBits &LHS, const KnownBits &RHS);
+
+  /// Determine if these known bits always give the same ICMP_ULT result.
+  static Optional ult(const KnownBits &LHS, const KnownBits &RHS);
+
+  /// Determine if these known bits always give the same ICMP_ULE result.
+  static Optional ule(const KnownBits &LHS, const KnownBits &RHS);
+
+  /// Determine if these known bits always give the same ICMP_SGT result.
+  static Optional sgt(const KnownBits &LHS, const KnownBits &RHS);
+
+  /// Determine if these known bits always give the same ICMP_SGE result.
+  static Optional sge(const KnownBits &LHS, const KnownBits &RHS);
+
+  /// Determine if these known bits always give the same ICMP_SLT result.
+  static Optional slt(const KnownBits &LHS, const KnownBits &RHS);
+
+  /// Determine if these known bits always give the same ICMP_SLE result.
+  static Optional sle(const KnownBits &LHS, const KnownBits &RHS);
+
   /// Insert the bits from a smaller known bits starting at bitPosition.
   void insertBits(const KnownBits &SubBits, unsigned BitPosition) {
 Zero.insertBits(SubBits.Zero, BitPosition);

diff  --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index 2c25b7d9bac5..0147d21d153a 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -268,6 +268,75 @@ KnownBits KnownBits::ashr(const KnownBits &LHS, const 
KnownBits &RHS) {
   return Known;
 }
 
+Optional KnownBits::eq(const KnownBits &LHS, const KnownBits &RHS) {
+  if (LHS.isConstant() && RHS.isConstant())
+return Optional(LHS.getConstant() == RHS.getConstant());
+  if (LHS.getMaxValue().ult(RHS.getMinValue()) ||
+  LHS.getMinValue().ugt(RHS.getMaxValue()))
+return Optional(false);
+  if (LHS.One.intersects(RHS.Zero) || RHS.One.intersects(LHS.Zero))
+return Optional(false);
+  return None;
+}
+
+Optional KnownBits::ne(const KnownBits &LHS, const KnownBits &RHS) {
+  if (Optional KnownEQ = eq(LHS, RHS))
+return Optional(!KnownEQ.getValue());
+  return None;
+}
+
+Optional KnownBits::ugt(const KnownBits &LHS, const KnownBits &RHS) {
+  if (LHS.isConstant() && RHS.isConstant())
+return Optional(LHS.getConstant().ugt(RHS.getConstant()));
+  // LHS >u RHS -> false if umax(LHS) <= umax(RHS)
+  if (LHS.getMaxValue().ule(RHS.getMinValue()))
+return Optional(false);
+  // LHS >u RHS -> true if umin(LHS) > umax(RHS)
+  if (LHS.getMinValue().ugt(RHS.getMaxValue()))
+return Optional(true);
+  return None;
+}
+
+Optional KnownBits::uge(const KnownBits &LHS, const KnownBits &RHS) {
+  if (Optional IsUGT = ugt(RHS, LHS))
+return Optional(!IsUGT.getValue());
+  return None;
+}
+
+Optional KnownBits::ult(const KnownBits &LHS, const KnownBits &RHS) {
+  return ugt(RHS, LHS);
+}
+
+Optional KnownBits::ule(const KnownBits &LHS, const KnownBits &RHS) {
+  return uge(RHS, LHS);
+}
+
+Optional KnownBits::sgt(const KnownBits &LHS, const KnownBits &RHS) {
+  if (LHS.isConstant() && RHS.isConstant())
+return Optional(LHS.getConstant().sgt(RHS.getConstant()));
+  // LHS >s RHS -> false if smax(LHS) <= smax

[llvm-branch-commits] [llvm] 4d7cb6d - [Sparc] SparcMCExpr::printVariantKind - fix Wcovered-switch-default gcc warning. NFCI.

2021-01-04 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-04T14:08:44Z
New Revision: 4d7cb6da9fcf980a8ddaa09ffa2dcab1525a66db

URL: 
https://github.com/llvm/llvm-project/commit/4d7cb6da9fcf980a8ddaa09ffa2dcab1525a66db
DIFF: 
https://github.com/llvm/llvm-project/commit/4d7cb6da9fcf980a8ddaa09ffa2dcab1525a66db.diff

LOG: [Sparc] SparcMCExpr::printVariantKind - fix Wcovered-switch-default gcc 
warning. NFCI.

Added: 


Modified: 
llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp

Removed: 




diff  --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp 
b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index 2f28a06f1573..b84ecf074455 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -41,49 +41,46 @@ void SparcMCExpr::printImpl(raw_ostream &OS, const 
MCAsmInfo *MAI) const {
 
 bool SparcMCExpr::printVariantKind(raw_ostream &OS, VariantKind Kind)
 {
-  bool closeParen = true;
   switch (Kind) {
-  default:
-llvm_unreachable("Unhandled SparcMCExpr::VariantKind");
-  case VK_Sparc_None: closeParen = false; break;
-  case VK_Sparc_LO:   OS << "%lo(";  break;
-  case VK_Sparc_HI:   OS << "%hi(";  break;
-  case VK_Sparc_H44:  OS << "%h44("; break;
-  case VK_Sparc_M44:  OS << "%m44("; break;
-  case VK_Sparc_L44:  OS << "%l44("; break;
-  case VK_Sparc_HH:   OS << "%hh(";  break;
-  case VK_Sparc_HM:   OS << "%hm(";  break;
+  case VK_Sparc_None: return false;
+  case VK_Sparc_LO:   OS << "%lo(";  return true;
+  case VK_Sparc_HI:   OS << "%hi(";  return true;
+  case VK_Sparc_H44:  OS << "%h44("; return true;
+  case VK_Sparc_M44:  OS << "%m44("; return true;
+  case VK_Sparc_L44:  OS << "%l44("; return true;
+  case VK_Sparc_HH:   OS << "%hh(";  return true;
+  case VK_Sparc_HM:   OS << "%hm(";  return true;
 // FIXME: use %pc22/%pc10, if system assembler supports them.
-  case VK_Sparc_PC22: OS << "%hi("; break;
-  case VK_Sparc_PC10: OS << "%lo("; break;
+  case VK_Sparc_PC22: OS << "%hi("; return true;
+  case VK_Sparc_PC10: OS << "%lo("; return true;
 // FIXME: use %got22/%got10, if system assembler supports them.
-  case VK_Sparc_GOT22:OS << "%hi("; break;
-  case VK_Sparc_GOT10:OS << "%lo("; break;
-  case VK_Sparc_GOT13:closeParen = false; break;
-  case VK_Sparc_13:   closeParen = false; break;
-  case VK_Sparc_WDISP30:  closeParen = false; break;
-  case VK_Sparc_WPLT30:   closeParen = false; break;
-  case VK_Sparc_R_DISP32: OS << "%r_disp32("; break;
-  case VK_Sparc_TLS_GD_HI22:   OS << "%tgd_hi22(";   break;
-  case VK_Sparc_TLS_GD_LO10:   OS << "%tgd_lo10(";   break;
-  case VK_Sparc_TLS_GD_ADD:OS << "%tgd_add(";break;
-  case VK_Sparc_TLS_GD_CALL:   OS << "%tgd_call(";   break;
-  case VK_Sparc_TLS_LDM_HI22:  OS << "%tldm_hi22(";  break;
-  case VK_Sparc_TLS_LDM_LO10:  OS << "%tldm_lo10(";  break;
-  case VK_Sparc_TLS_LDM_ADD:   OS << "%tldm_add(";   break;
-  case VK_Sparc_TLS_LDM_CALL:  OS << "%tldm_call(";  break;
-  case VK_Sparc_TLS_LDO_HIX22: OS << "%tldo_hix22("; break;
-  case VK_Sparc_TLS_LDO_LOX10: OS << "%tldo_lox10("; break;
-  case VK_Sparc_TLS_LDO_ADD:   OS << "%tldo_add(";   break;
-  case VK_Sparc_TLS_IE_HI22:   OS << "%tie_hi22(";   break;
-  case VK_Sparc_TLS_IE_LO10:   OS << "%tie_lo10(";   break;
-  case VK_Sparc_TLS_IE_LD: OS << "%tie_ld("; break;
-  case VK_Sparc_TLS_IE_LDX:OS << "%tie_ldx(";break;
-  case VK_Sparc_TLS_IE_ADD:OS << "%tie_add(";break;
-  case VK_Sparc_TLS_LE_HIX22:  OS << "%tle_hix22(";  break;
-  case VK_Sparc_TLS_LE_LOX10:  OS << "%tle_lox10(";  break;
+  case VK_Sparc_GOT22:OS << "%hi("; return true;
+  case VK_Sparc_GOT10:OS << "%lo("; return true;
+  case VK_Sparc_GOT13:return false;
+  case VK_Sparc_13:   return false;
+  case VK_Sparc_WDISP30:  return false;
+  case VK_Sparc_WPLT30:   return false;
+  case VK_Sparc_R_DISP32: OS << "%r_disp32("; return true;
+  case VK_Sparc_TLS_GD_HI22:   OS << "%tgd_hi22(";   return true;
+  case VK_Sparc_TLS_GD_LO10:   OS << "%tgd_lo10(";   return true;
+  case VK_Sparc_TLS_GD_ADD:OS << "%tgd_add(";return true;
+  case VK_Sparc_TLS_GD_CALL:   OS << "%tgd_call(";   return true;
+  case VK_Sparc_TLS_LDM_HI22:  OS << "%tldm_hi22(";  return true;
+  case VK_Sparc_TLS_LDM_LO10:  OS << "%tldm_lo10(";  return true;
+  case VK_Sparc_TLS_LDM_ADD:   OS << "%tldm_add(";   return true;
+  case VK_Sparc_TLS_LDM_CALL:  OS << "%tldm_call(";  return true;
+  case VK_Sparc_TLS_LDO_HIX22: OS << "%tldo_hix22("; return true;
+  case VK_Sparc_TLS_LDO_LOX10: OS << "%tldo_lox10("; return true;
+  case VK_Sparc_TLS_LDO_ADD:   OS << "%tldo_add(";   return true;
+  case VK_Sparc_TLS_IE_HI22:   OS << "%tie_hi22(";   return true;
+  case VK_Sparc_TLS_IE_LO10:   OS << "%tie_lo10(";   return true;
+  case VK_Sparc_TLS_IE_LD: OS <<

[llvm-branch-commits] [clang] 9f8c0d1 - DeclCXX - Fix getAs<> null-dereference static analyzer warnings. NFCI.

2021-01-04 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-04T15:12:55Z
New Revision: 9f8c0d15c7f706a124ba29e8f40dc1937cd5bd49

URL: 
https://github.com/llvm/llvm-project/commit/9f8c0d15c7f706a124ba29e8f40dc1937cd5bd49
DIFF: 
https://github.com/llvm/llvm-project/commit/9f8c0d15c7f706a124ba29e8f40dc1937cd5bd49.diff

LOG: DeclCXX - Fix getAs<> null-dereference static analyzer warnings. NFCI.

getAs<> can return null if the cast is invalid, which can lead to null pointer 
deferences. Use castAs<> instead which will assert that the cast is valid.

Added: 


Modified: 
clang/lib/AST/DeclCXX.cpp

Removed: 




diff  --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp
index 16eb8206dba2..b806adf36bfb 100644
--- a/clang/lib/AST/DeclCXX.cpp
+++ b/clang/lib/AST/DeclCXX.cpp
@@ -1508,7 +1508,7 @@ CXXMethodDecl *CXXRecordDecl::getLambdaCallOperator() 
const {
 
 CXXMethodDecl* CXXRecordDecl::getLambdaStaticInvoker() const {
   CXXMethodDecl *CallOp = getLambdaCallOperator();
-  CallingConv CC = CallOp->getType()->getAs()->getCallConv();
+  CallingConv CC = CallOp->getType()->castAs()->getCallConv();
   return getLambdaStaticInvoker(CC);
 }
 
@@ -1532,8 +1532,8 @@ CXXMethodDecl 
*CXXRecordDecl::getLambdaStaticInvoker(CallingConv CC) const {
   DeclContext::lookup_result Invoker = getLambdaStaticInvokers(*this);
 
   for (NamedDecl *ND : Invoker) {
-const FunctionType *FTy =
-cast(ND->getAsFunction())->getType()->getAs();
+const auto *FTy =
+
cast(ND->getAsFunction())->getType()->castAs();
 if (FTy->getCallConv() == CC)
   return getInvokerAsMethod(ND);
   }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] e9f401d - [IR] CallBase::getBundleOpInfoForOperand - ensure Current iterator is defined. NFCI.

2021-01-04 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-04T15:30:15Z
New Revision: e9f401d8a261e747f5dfc9e297f12ab26e56893d

URL: 
https://github.com/llvm/llvm-project/commit/e9f401d8a261e747f5dfc9e297f12ab26e56893d
DIFF: 
https://github.com/llvm/llvm-project/commit/e9f401d8a261e747f5dfc9e297f12ab26e56893d.diff

LOG: [IR] CallBase::getBundleOpInfoForOperand - ensure Current iterator is 
defined. NFCI.

Fix clang static analyzer undefined pointer warning in the case Begin == End.

Added: 


Modified: 
llvm/lib/IR/Instructions.cpp

Removed: 




diff  --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 47bf3966bc27..d6b4a4f5030f 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -400,7 +400,7 @@ CallBase::BundleOpInfo 
&CallBase::getBundleOpInfoForOperand(unsigned OpIdx) {
 
   bundle_op_iterator Begin = bundle_op_info_begin();
   bundle_op_iterator End = bundle_op_info_end();
-  bundle_op_iterator Current;
+  bundle_op_iterator Current = Begin;
 
   while (Begin != End) {
 unsigned ScaledOperandPerBundle =



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] f7463ca - [ProfileData] GCOVFile::readGCNO - silence undefined pointer warning. NFCI.

2021-01-04 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-04T16:50:05Z
New Revision: f7463ca3cc5ba8455c4611c5afa79c48d8a79326

URL: 
https://github.com/llvm/llvm-project/commit/f7463ca3cc5ba8455c4611c5afa79c48d8a79326
DIFF: 
https://github.com/llvm/llvm-project/commit/f7463ca3cc5ba8455c4611c5afa79c48d8a79326.diff

LOG: [ProfileData] GCOVFile::readGCNO - silence undefined pointer warning. NFCI.

Silence clang static analyzer warning that 'fn' could still be in an undefined 
state - this shouldn't happen depending on the likely tag order, but the 
analyzer can't know that.

Added: 


Modified: 
llvm/lib/ProfileData/GCOV.cpp

Removed: 




diff  --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp
index 2e1ba3338394..3332a898603b 100644
--- a/llvm/lib/ProfileData/GCOV.cpp
+++ b/llvm/lib/ProfileData/GCOV.cpp
@@ -111,7 +111,7 @@ bool GCOVFile::readGCNO(GCOVBuffer &buf) {
 buf.getWord(); // hasUnexecutedBlocks
 
   uint32_t tag, length;
-  GCOVFunction *fn;
+  GCOVFunction *fn = nullptr;
   while ((tag = buf.getWord())) {
 if (!buf.readInt(length))
   return false;



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 6725860 - Sema::BuildCallExpr - use cast<> instead of dyn_cast<> for dereferenced pointer. NFCI.

2021-01-05 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-05T09:34:00Z
New Revision: 6725860d21a03741d6c3331ab0560416bb19e068

URL: 
https://github.com/llvm/llvm-project/commit/6725860d21a03741d6c3331ab0560416bb19e068
DIFF: 
https://github.com/llvm/llvm-project/commit/6725860d21a03741d6c3331ab0560416bb19e068.diff

LOG: Sema::BuildCallExpr - use cast<> instead of dyn_cast<> for dereferenced 
pointer. NFCI.

We're immediately dereferencing the casted pointer, so use cast<> which will 
assert instead of dyn_cast<> which can return null.

Fixes static analyzer warning.

Added: 


Modified: 
clang/lib/Sema/SemaExpr.cpp

Removed: 




diff  --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 3992a373f721..28f4c5bbf19b 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -6484,7 +6484,7 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, 
SourceLocation LParenLoc,
"should only occur in error-recovery path.");
 QualType ReturnType =
 llvm::isa_and_nonnull(NDecl)
-? dyn_cast(NDecl)->getCallResultType()
+? cast(NDecl)->getCallResultType()
 : Context.DependentTy;
 return CallExpr::Create(Context, Fn, ArgExprs, ReturnType,
 Expr::getValueKindForType(ReturnType), RParenLoc,



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 52e4489 - SystemZTargetLowering::lowerDYNAMIC_STACKALLOC - use cast<> instead of dyn_cast<> for dereferenced pointer. NFCI.

2021-01-05 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-05T09:34:01Z
New Revision: 52e448974b2ec826c8af429c370c4d6e79ce5747

URL: 
https://github.com/llvm/llvm-project/commit/52e448974b2ec826c8af429c370c4d6e79ce5747
DIFF: 
https://github.com/llvm/llvm-project/commit/52e448974b2ec826c8af429c370c4d6e79ce5747.diff

LOG: SystemZTargetLowering::lowerDYNAMIC_STACKALLOC - use cast<> instead of 
dyn_cast<> for dereferenced pointer. NFCI.

We're immediately dereferencing the casted pointer, so use cast<> which will 
assert instead of dyn_cast<> which can return null.

Fixes static analyzer warning.

Added: 


Modified: 
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp 
b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 663af1d64943..603446755aaf 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -3419,8 +3419,8 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) 
const {
 
   // If user has set the no alignment function attribute, ignore
   // alloca alignments.
-  uint64_t AlignVal = (RealignOpt ?
-   dyn_cast(Align)->getZExtValue() : 0);
+  uint64_t AlignVal =
+  (RealignOpt ? cast(Align)->getZExtValue() : 0);
 
   uint64_t StackAlign = TFI->getStackAlignment();
   uint64_t RequiredAlign = std::max(AlignVal, StackAlign);



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 84d5768 - MemProfiler::insertDynamicShadowAtFunctionEntry - use cast<> instead of dyn_cast<> for dereferenced pointer. NFCI.

2021-01-05 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-05T09:34:01Z
New Revision: 84d5768d97635602225f5056da96b058e588b2f5

URL: 
https://github.com/llvm/llvm-project/commit/84d5768d97635602225f5056da96b058e588b2f5
DIFF: 
https://github.com/llvm/llvm-project/commit/84d5768d97635602225f5056da96b058e588b2f5.diff

LOG: MemProfiler::insertDynamicShadowAtFunctionEntry - use cast<> instead of 
dyn_cast<> for dereferenced pointer. NFCI.

We're immediately dereferencing the casted pointer, so use cast<> which will 
assert instead of dyn_cast<> which can return null.

Fixes static analyzer warning.

Added: 


Modified: 
llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp 
b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 56006bbc94c7..0e6a404a9e0b 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -577,7 +577,7 @@ bool 
MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) {
   Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal(
   MemProfShadowMemoryDynamicAddress, IntptrTy);
   if (F.getParent()->getPICLevel() == PICLevel::NotPIC)
-dyn_cast(GlobalDynamicAddress)->setDSOLocal(true);
+cast(GlobalDynamicAddress)->setDSOLocal(true);
   DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress);
   return true;
 }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 7a97eeb - [Coroutines] checkAsyncFuncPointer - use cast<> instead of dyn_cast<> for dereferenced pointer. NFCI.

2021-01-05 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-05T10:31:45Z
New Revision: 7a97eeb197a8023acbb800d40b3bb852fc2f5d60

URL: 
https://github.com/llvm/llvm-project/commit/7a97eeb197a8023acbb800d40b3bb852fc2f5d60
DIFF: 
https://github.com/llvm/llvm-project/commit/7a97eeb197a8023acbb800d40b3bb852fc2f5d60.diff

LOG: [Coroutines] checkAsyncFuncPointer - use cast<> instead of dyn_cast<> for 
dereferenced pointer. NFCI.

We're immediately dereferencing the casted pointer, so use cast<> which will 
assert instead of dyn_cast<> which can return null.

Fixes static analyzer warning.

Added: 


Modified: 
llvm/lib/Transforms/Coroutines/Coroutines.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp 
b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index f0095a649b0c..6699a5c46313 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -676,8 +676,8 @@ static void checkAsyncFuncPointer(const Instruction *I, 
Value *V) {
   if (!AsyncFuncPtrAddr)
 fail(I, "llvm.coro.id.async async function pointer not a global", V);
 
-  auto *StructTy = dyn_cast(
-  AsyncFuncPtrAddr->getType()->getPointerElementType());
+  auto *StructTy =
+  cast(AsyncFuncPtrAddr->getType()->getPointerElementType());
   if (StructTy->isOpaque() || !StructTy->isPacked() ||
   StructTy->getNumElements() != 2 ||
   !StructTy->getElementType(0)->isIntegerTy(32) ||



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] a000366 - [SimplifyIndVar] createWideIV - make WideIVInfo arg a const ref. NFCI.

2021-01-05 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-05T10:31:45Z
New Revision: a000366d0502b35fc0d3b113ace7f0e3bbdc08cd

URL: 
https://github.com/llvm/llvm-project/commit/a000366d0502b35fc0d3b113ace7f0e3bbdc08cd
DIFF: 
https://github.com/llvm/llvm-project/commit/a000366d0502b35fc0d3b113ace7f0e3bbdc08cd.diff

LOG: [SimplifyIndVar] createWideIV - make WideIVInfo arg a const ref. NFCI.

The WideIVInfo arg is only ever used as a const.

Fixes cppcheck warning.

Added: 


Modified: 
llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
llvm/lib/Transforms/Utils/SimplifyIndVar.cpp

Removed: 




diff  --git a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h 
b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
index 4599627b65f5..4ba56fb45afa 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
@@ -74,7 +74,7 @@ struct WideIVInfo {
 
 /// Widen Induction Variables - Extend the width of an IV to cover its
 /// widest uses.
-PHINode *createWideIV(WideIVInfo &WI,
+PHINode *createWideIV(const WideIVInfo &WI,
 LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter,
 DominatorTree *DT, SmallVectorImpl &DeadInsts,
 unsigned &NumElimExt, unsigned &NumWidened,

diff  --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp 
b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index f3b198094bd1..290c04a7ad10 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -2076,7 +2076,7 @@ void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
   }
 }
 
-PHINode *llvm::createWideIV(WideIVInfo &WI,
+PHINode *llvm::createWideIV(const WideIVInfo &WI,
 LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter,
 DominatorTree *DT, SmallVectorImpl &DeadInsts,
 unsigned &NumElimExt, unsigned &NumWidened,



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 313d982 - [IR] Add ConstantInt::getBool helpers to wrap getTrue/getFalse.

2021-01-05 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-05T11:01:10Z
New Revision: 313d982df65a7a8f1da2da5f0e03e6b6e301ce3c

URL: 
https://github.com/llvm/llvm-project/commit/313d982df65a7a8f1da2da5f0e03e6b6e301ce3c
DIFF: 
https://github.com/llvm/llvm-project/commit/313d982df65a7a8f1da2da5f0e03e6b6e301ce3c.diff

LOG: [IR] Add ConstantInt::getBool helpers to wrap getTrue/getFalse.

Added: 


Modified: 
llvm/include/llvm/IR/Constants.h
llvm/lib/IR/Constants.cpp
llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Removed: 




diff  --git a/llvm/include/llvm/IR/Constants.h 
b/llvm/include/llvm/IR/Constants.h
index 3fbbf53c29b4..ac802232c23d 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -88,8 +88,10 @@ class ConstantInt final : public ConstantData {
 
   static ConstantInt *getTrue(LLVMContext &Context);
   static ConstantInt *getFalse(LLVMContext &Context);
+  static ConstantInt *getBool(LLVMContext &Context, bool V);
   static Constant *getTrue(Type *Ty);
   static Constant *getFalse(Type *Ty);
+  static Constant *getBool(Type *Ty, bool V);
 
   /// If Ty is a vector type, return a Constant with a splat of the given
   /// value. Otherwise return a ConstantInt for the given value.

diff  --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index 82a5f9db0bf7..a38302d17937 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -815,6 +815,10 @@ ConstantInt *ConstantInt::getFalse(LLVMContext &Context) {
   return pImpl->TheFalseVal;
 }
 
+ConstantInt *ConstantInt::getBool(LLVMContext &Context, bool V) {
+  return V ? getTrue(Context) : getFalse(Context);
+}
+
 Constant *ConstantInt::getTrue(Type *Ty) {
   assert(Ty->isIntOrIntVectorTy(1) && "Type not i1 or vector of i1.");
   ConstantInt *TrueC = ConstantInt::getTrue(Ty->getContext());
@@ -831,6 +835,10 @@ Constant *ConstantInt::getFalse(Type *Ty) {
   return FalseC;
 }
 
+Constant *ConstantInt::getBool(Type *Ty, bool V) {
+  return V ? getTrue(Ty) : getFalse(Ty);
+}
+
 // Get a ConstantInt from an APInt.
 ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) {
   // get an existing value or the insertion position

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 83b310bfcd05..87d4b40a9a64 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5037,11 +5037,9 @@ Instruction 
*InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
 llvm_unreachable("Unknown icmp opcode!");
   case ICmpInst::ICMP_EQ:
   case ICmpInst::ICMP_NE: {
-if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) {
-  return Pred == CmpInst::ICMP_EQ
- ? replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()))
- : replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
-}
+if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+  return replaceInstUsesWith(
+  I, ConstantInt::getBool(I.getType(), Pred == CmpInst::ICMP_NE));
 
 // If all bits are known zero except for one, then we know at most one bit
 // is set. If the comparison is against zero, then this is a check to see 
if



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] dc74d7e - [X86] getMemoryOpCost - use dyn_cast_or_null. NFCI.

2021-01-05 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-05T13:23:09Z
New Revision: dc74d7ed1f651aa61d15b4eaaa32200df1f38d37

URL: 
https://github.com/llvm/llvm-project/commit/dc74d7ed1f651aa61d15b4eaaa32200df1f38d37
DIFF: 
https://github.com/llvm/llvm-project/commit/dc74d7ed1f651aa61d15b4eaaa32200df1f38d37.diff

LOG: [X86] getMemoryOpCost - use dyn_cast_or_null. NFCI.

Use instead of the isa_and_nonnull and use the 
StoreInst::getPointerOperand wrapper instead of a hardcoded 
Instruction::getOperand.

Looks cleaner and avoids a spurious clang static analyzer null dereference 
warning.

Added: 


Modified: 
llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Removed: 




diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp 
b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 5a342d41fb5e..71455237fb61 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3188,11 +3188,10 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type 
*Src,
 const Instruction *I) {
   // TODO: Handle other cost kinds.
   if (CostKind != TTI::TCK_RecipThroughput) {
-if (isa_and_nonnull(I)) {
-  Value *Ptr = I->getOperand(1);
+if (auto *SI = dyn_cast_or_null(I)) {
   // Store instruction with index and scale costs 2 Uops.
   // Check the preceding GEP to identify non-const indices.
-  if (auto *GEP = dyn_cast(Ptr)) {
+  if (auto *GEP = dyn_cast(SI->getPointerOperand())) {
 if (!all_of(GEP->indices(), [](Value *V) { return isa(V); }))
   return TTI::TCC_Basic * 2;
   }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 73a44f4 - [X86][AVX] combineVectorSignBitsTruncation - use PACKSS/PACKUS in more AVX cases

2021-01-05 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-05T15:01:45Z
New Revision: 73a44f437bf19ecf2c865e6c8b9b8a2e4a811960

URL: 
https://github.com/llvm/llvm-project/commit/73a44f437bf19ecf2c865e6c8b9b8a2e4a811960
DIFF: 
https://github.com/llvm/llvm-project/commit/73a44f437bf19ecf2c865e6c8b9b8a2e4a811960.diff

LOG: [X86][AVX] combineVectorSignBitsTruncation - use PACKSS/PACKUS in more AVX 
cases

AVX512 has fast truncation ops, but if the truncation source is a concatenation 
of subvectors then its likely that we can use PACK more efficiently.

This is only guaranteed to work for truncations to 128/256-bit vectors as the 
PACK works across 128-bit sub-lanes, for now I've just disabled 512-bit 
truncation cases but we need to get them working eventually for D61129.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-pack-128.ll
llvm/test/CodeGen/X86/vector-pack-256.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4dce5283b2ab..16f1023ed5f8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -45706,8 +45706,13 @@ static SDValue combineVectorSignBitsTruncation(SDNode 
*N, const SDLoc &DL,
   // there's no harm in trying pack.
   if (Subtarget.hasAVX512() &&
   !(!Subtarget.useAVX512Regs() && VT.is256BitVector() &&
-InVT.is512BitVector()))
-return SDValue();
+InVT.is512BitVector())) {
+// PACK should still be worth it for 128/256-bit vectors if the sources 
were
+// originally concatenated from subvectors.
+SmallVector ConcatOps;
+if (VT.getSizeInBits() > 256 || !collectConcatOps(In.getNode(), ConcatOps))
+  return SDValue();
+  }
 
   unsigned NumPackedSignBits = std::min(SVT.getSizeInBits(), 16);
   unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8;

diff  --git a/llvm/test/CodeGen/X86/vector-pack-128.ll 
b/llvm/test/CodeGen/X86/vector-pack-128.ll
index 9b0bbac0199d..a49d0f9e3605 100644
--- a/llvm/test/CodeGen/X86/vector-pack-128.ll
+++ b/llvm/test/CodeGen/X86/vector-pack-128.ll
@@ -35,9 +35,7 @@ define <8 x i16> @trunc_concat_packssdw_128(<4 x i32> %a0, <4 
x i32> %a1) nounwi
 ; AVX512:   # %bb.0:
 ; AVX512-NEXT:vpsrad $17, %xmm0, %xmm0
 ; AVX512-NEXT:vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512-NEXT:vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512-NEXT:vpmovdw %ymm0, %xmm0
-; AVX512-NEXT:vzeroupper
+; AVX512-NEXT:vpackssdw %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:retq
   %1 = ashr <4 x i32> %a0, 
   %2 = and  <4 x i32> %a1, 
@@ -80,9 +78,7 @@ define <8 x i16> @trunc_concat_packusdw_128(<4 x i32> %a0, <4 
x i32> %a1) nounwi
 ; AVX512:   # %bb.0:
 ; AVX512-NEXT:vpsrld $17, %xmm0, %xmm0
 ; AVX512-NEXT:vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1
-; AVX512-NEXT:vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512-NEXT:vpmovdw %ymm0, %xmm0
-; AVX512-NEXT:vzeroupper
+; AVX512-NEXT:vpackusdw %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:retq
   %1 = lshr <4 x i32> %a0, 
   %2 = and  <4 x i32> %a1, 
@@ -99,38 +95,12 @@ define <16 x i8> @trunc_concat_packsswb_128(<8 x i16> %a0, 
<8 x i16> %a1) nounwi
 ; SSE-NEXT:packsswb %xmm1, %xmm0
 ; SSE-NEXT:retq
 ;
-; AVX1-LABEL: trunc_concat_packsswb_128:
-; AVX1:   # %bb.0:
-; AVX1-NEXT:vpsraw $15, %xmm0, %xmm0
-; AVX1-NEXT:vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT:vpacksswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:retq
-;
-; AVX2-LABEL: trunc_concat_packsswb_128:
-; AVX2:   # %bb.0:
-; AVX2-NEXT:vpsraw $15, %xmm0, %xmm0
-; AVX2-NEXT:vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT:vpacksswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:retq
-;
-; AVX512F-LABEL: trunc_concat_packsswb_128:
-; AVX512F:   # %bb.0:
-; AVX512F-NEXT:vpsraw $15, %xmm0, %xmm0
-; AVX512F-NEXT:vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512F-NEXT:vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT:vpmovzxwd {{.*#+}} zmm0 = 
ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512F-NEXT:vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT:vzeroupper
-; AVX512F-NEXT:retq
-;
-; AVX512BW-LABEL: trunc_concat_packsswb_128:
-; AVX512BW:   # %bb.0:
-; AVX512BW-NEXT:vpsraw $15, %xmm0, %xmm0
-; AVX512BW-NEXT:vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX512BW-NEXT:vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512BW-NEXT:vpmovwb %ymm0, %xmm0
-; AVX512BW-NEXT:vzeroupper
-; AVX512BW-NEXT:retq
+; AVX-LABEL: trunc_concat_packsswb_128:
+; AVX:   # %bb.0:
+; AVX-NEXT:vpsraw $15, %xmm0, %xmm0
+; AVX-NEXT:vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:vpacksswb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:retq
   %1 = ashr <8 x i16> %a0, 
   %2 = and  <8

[llvm-branch-commits] [clang] 55488bd - CGExpr - EmitMatrixSubscriptExpr - fix getAs<> null-dereference static analyzer warning. NFCI.

2021-01-05 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-05T17:08:11Z
New Revision: 55488bd3cd1a468941e26ad4cf94f2bad887fc02

URL: 
https://github.com/llvm/llvm-project/commit/55488bd3cd1a468941e26ad4cf94f2bad887fc02
DIFF: 
https://github.com/llvm/llvm-project/commit/55488bd3cd1a468941e26ad4cf94f2bad887fc02.diff

LOG: CGExpr - EmitMatrixSubscriptExpr - fix getAs<> null-dereference static 
analyzer warning. NFCI.

getAs<> can return null if the cast is invalid, which can lead to null pointer 
deferences. Use castAs<> instead which will assert that the cast is valid.

Added: 


Modified: 
clang/lib/CodeGen/CGExpr.cpp

Removed: 




diff  --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 3013fffcbf6d..a3f90449bb4c 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3858,7 +3858,7 @@ LValue CodeGenFunction::EmitMatrixSubscriptExpr(const 
MatrixSubscriptExpr *E) {
   llvm::Value *ColIdx = EmitScalarExpr(E->getColumnIdx());
   llvm::Value *NumRows = Builder.getIntN(
   RowIdx->getType()->getScalarSizeInBits(),
-  E->getBase()->getType()->getAs()->getNumRows());
+  E->getBase()->getType()->castAs()->getNumRows());
   llvm::Value *FinalIdx =
   Builder.CreateAdd(Builder.CreateMul(ColIdx, NumRows), RowIdx);
   return LValue::MakeMatrixElt(



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] dfcb872 - [X86] Add scalar/vector test coverage for D93599

2021-01-06 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-06T11:58:27Z
New Revision: dfcb872c3e82c821bb32a2dd53ab73314d38ce38

URL: 
https://github.com/llvm/llvm-project/commit/dfcb872c3e82c821bb32a2dd53ab73314d38ce38
DIFF: 
https://github.com/llvm/llvm-project/commit/dfcb872c3e82c821bb32a2dd53ab73314d38ce38.diff

LOG: [X86] Add scalar/vector test coverage for D93599

This expands the test coverage beyond just the boolvector/movmsk concat pattern

Added: 
llvm/test/CodeGen/X86/cmp-concat.ll

Modified: 


Removed: 




diff  --git a/llvm/test/CodeGen/X86/cmp-concat.ll 
b/llvm/test/CodeGen/X86/cmp-concat.ll
new file mode 100644
index ..a622ad7faff7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/cmp-concat.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s
+
+define i1 @cmp_allbits_concat_i8(i8 %x, i8 %y) {
+; CHECK-LABEL: cmp_allbits_concat_i8:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:movzbl %sil, %eax
+; CHECK-NEXT:shll $8, %edi
+; CHECK-NEXT:orl %eax, %edi
+; CHECK-NEXT:cmpw $-1, %di
+; CHECK-NEXT:sete %al
+; CHECK-NEXT:retq
+  %zx = zext i8 %x to i16
+  %zy = zext i8 %y to i16
+  %sh = shl i16 %zx, 8
+  %or = or i16 %zy, %sh
+  %r = icmp eq i16 %or, -1
+  ret i1 %r
+}
+
+define i1 @cmp_anybits_concat_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: cmp_anybits_concat_i32:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:# kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:movl %esi, %eax
+; CHECK-NEXT:shlq $32, %rdi
+; CHECK-NEXT:orq %rax, %rdi
+; CHECK-NEXT:setne %al
+; CHECK-NEXT:retq
+  %zx = zext i32 %x to i64
+  %zy = zext i32 %y to i64
+  %sh = shl i64 %zx, 32
+  %or = or i64 %zy, %sh
+  %r = icmp ne i64 %or, 0
+  ret i1 %r
+}
+
+define <16 x i8> @cmp_allbits_concat_v16i8(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: cmp_allbits_concat_v16i8:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:movdqa %xmm1, %xmm2
+; CHECK-NEXT:punpcklbw {{.*#+}} xmm2 = 
xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; CHECK-NEXT:punpckhbw {{.*#+}} xmm1 = 
xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; CHECK-NEXT:pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT:pcmpeqw %xmm0, %xmm1
+; CHECK-NEXT:pcmpeqw %xmm2, %xmm0
+; CHECK-NEXT:packsswb %xmm1, %xmm0
+; CHECK-NEXT:retq
+  %zx = zext <16 x i8> %x to <16 x i16>
+  %zy = zext <16 x i8> %y to <16 x i16>
+  %sh = shl <16 x i16> %zx, 
+  %or = or <16 x i16> %zy, %sh
+  %r = icmp eq <16 x i16> %or, 
+  %s = sext <16 x i1> %r to <16 x i8>
+  ret <16 x i8> %s
+}
+
+define <2 x i64> @cmp_nobits_concat_v2i64(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: cmp_nobits_concat_v2i64:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:movq %xmm0, %rax
+; CHECK-NEXT:pextrq $1, %xmm0, %rcx
+; CHECK-NEXT:movq %xmm1, %rdx
+; CHECK-NEXT:pextrq $1, %xmm1, %rsi
+; CHECK-NEXT:xorl %edi, %edi
+; CHECK-NEXT:orq %rcx, %rsi
+; CHECK-NEXT:sete %dil
+; CHECK-NEXT:negq %rdi
+; CHECK-NEXT:movq %rdi, %xmm1
+; CHECK-NEXT:xorl %ecx, %ecx
+; CHECK-NEXT:orq %rax, %rdx
+; CHECK-NEXT:sete %cl
+; CHECK-NEXT:negq %rcx
+; CHECK-NEXT:movq %rcx, %xmm0
+; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:retq
+  %zx = zext <2 x i64> %x to <2 x i128>
+  %zy = zext <2 x i64> %y to <2 x i128>
+  %sh = shl <2 x i128> %zx, 
+  %or = or <2 x i128> %zy, %sh
+  %r = icmp eq <2 x i128> %or, zeroinitializer
+  %s = sext <2 x i1> %r to <2 x i64>
+  ret <2 x i64> %s
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 37ac4f8 - [Hexagon] Regenerate zext-v4i1.ll tests

2021-01-06 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-06T12:56:06Z
New Revision: 37ac4f865fba451d969bd9b4b1e28ce296e093da

URL: 
https://github.com/llvm/llvm-project/commit/37ac4f865fba451d969bd9b4b1e28ce296e093da
DIFF: 
https://github.com/llvm/llvm-project/commit/37ac4f865fba451d969bd9b4b1e28ce296e093da.diff

LOG: [Hexagon] Regenerate zext-v4i1.ll tests

This will be improved by part of the work for D86578

Added: 


Modified: 
llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll

Removed: 




diff  --git a/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll 
b/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll
index e5394d929bb1..5f9a1522a2f6 100644
--- a/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll
+++ b/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll
@@ -1,12 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=hexagon -hexagon-instsimplify=0 < %s | FileCheck %s
 
 ; Check that this compiles successfully.
-; CHECK: vcmph.eq
 
 target datalayout = 
"e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
 target triple = "hexagon"
 
 define i32 @fred(<8 x i16>* %a0) #0 {
+; CHECK-LABEL: fred:
+; CHECK:   // %bb.0: // %b0
+; CHECK-NEXT:{
+; CHECK-NEXT: if (p0) jump:nt .LBB0_2
+; CHECK-NEXT:}
+; CHECK-NEXT:  // %bb.1: // %b2
+; CHECK-NEXT:{
+; CHECK-NEXT: r3:2 = combine(#0,#0)
+; CHECK-NEXT: r1:0 = memd(r0+#0)
+; CHECK-NEXT:}
+; CHECK-NEXT:{
+; CHECK-NEXT: p0 = vcmph.eq(r1:0,r3:2)
+; CHECK-NEXT:}
+; CHECK-NEXT:{
+; CHECK-NEXT: r1:0 = mask(p0)
+; CHECK-NEXT:}
+; CHECK-NEXT:{
+; CHECK-NEXT: r0 = and(r0,#1)
+; CHECK-NEXT:}
+; CHECK-NEXT:{
+; CHECK-NEXT: p0 = cmp.eq(r0,#11)
+; CHECK-NEXT: r0 = #1
+; CHECK-NEXT:}
+; CHECK-NEXT:{
+; CHECK-NEXT: if (p0) r0 = #0
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT:}
+; CHECK-NEXT:  .LBB0_2: // %b14
+; CHECK-NEXT:{
+; CHECK-NEXT: r0 = #0
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT:}
 b0:
   switch i32 undef, label %b14 [
 i32 5, label %b2



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 396dd6c - [ProfileData] Pass Twine by const reference instead of by value.

2021-01-06 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-06T14:22:03Z
New Revision: 396dd6cd3d8bdcda9dcb606ad4c054560bf0649f

URL: 
https://github.com/llvm/llvm-project/commit/396dd6cd3d8bdcda9dcb606ad4c054560bf0649f
DIFF: 
https://github.com/llvm/llvm-project/commit/396dd6cd3d8bdcda9dcb606ad4c054560bf0649f.diff

LOG: [ProfileData] Pass Twine by const reference instead of by value.

Its only used by DiagnosticInfoSampleProfile which takes a const reference 
anyhow.

Added: 


Modified: 
llvm/include/llvm/ProfileData/SampleProfReader.h

Removed: 




diff  --git a/llvm/include/llvm/ProfileData/SampleProfReader.h 
b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 35e71f336c27..92fe825beefc 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -226,7 +226,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
@@ -247,6 +246,7 @@
 namespace llvm {
 
 class raw_ostream;
+class Twine;
 
 namespace sampleprof {
 
@@ -408,7 +408,7 @@ class SampleProfileReader {
   StringMap &getProfiles() { return Profiles; }
 
   /// Report a parse error message.
-  void reportError(int64_t LineNumber, Twine Msg) const {
+  void reportError(int64_t LineNumber, const Twine &Msg) const {
 Ctx.diagnose(DiagnosticInfoSampleProfile(Buffer->getBufferIdentifier(),
  LineNumber, Msg));
   }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] df5c2ca - [MIPS] MipsAsmParser - Pass Twine by const reference instead of by value. NFCI.

2021-01-06 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-06T14:22:04Z
New Revision: df5c2caf0fc0d59d4d2e0ce99da4aa58f204791a

URL: 
https://github.com/llvm/llvm-project/commit/df5c2caf0fc0d59d4d2e0ce99da4aa58f204791a
DIFF: 
https://github.com/llvm/llvm-project/commit/df5c2caf0fc0d59d4d2e0ce99da4aa58f204791a.diff

LOG: [MIPS] MipsAsmParser - Pass Twine by const reference instead of by value. 
NFCI.

Added: 


Modified: 
llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp

Removed: 




diff  --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp 
b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 9dbbdeb34dba..e4d61f8c210e 100644
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -352,8 +352,8 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool expandSaaAddr(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
  const MCSubtargetInfo *STI);
 
-  bool reportParseError(Twine ErrorMsg);
-  bool reportParseError(SMLoc Loc, Twine ErrorMsg);
+  bool reportParseError(const Twine &ErrorMsg);
+  bool reportParseError(SMLoc Loc, const Twine &ErrorMsg);
 
   bool parseMemOffset(const MCExpr *&Res, bool isParenExpr);
 
@@ -6982,12 +6982,12 @@ bool 
MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 
 // FIXME: Given that these have the same name, these should both be
 // consistent on affecting the Parser.
-bool MipsAsmParser::reportParseError(Twine ErrorMsg) {
+bool MipsAsmParser::reportParseError(const Twine &ErrorMsg) {
   SMLoc Loc = getLexer().getLoc();
   return Error(Loc, ErrorMsg);
 }
 
-bool MipsAsmParser::reportParseError(SMLoc Loc, Twine ErrorMsg) {
+bool MipsAsmParser::reportParseError(SMLoc Loc, const Twine &ErrorMsg) {
   return Error(Loc, ErrorMsg);
 }
 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 26c486c - [TableGen] RegisterBankEmitter - Pass Twine by const reference instead of by value. NFCI.

2021-01-06 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-06T14:22:05Z
New Revision: 26c486c2eb1a0f302eb60a4b959456f09adbbacb

URL: 
https://github.com/llvm/llvm-project/commit/26c486c2eb1a0f302eb60a4b959456f09adbbacb
DIFF: 
https://github.com/llvm/llvm-project/commit/26c486c2eb1a0f302eb60a4b959456f09adbbacb.diff

LOG: [TableGen] RegisterBankEmitter - Pass Twine by const reference instead of 
by value. NFCI.

Added: 


Modified: 
llvm/utils/TableGen/RegisterBankEmitter.cpp

Removed: 




diff  --git a/llvm/utils/TableGen/RegisterBankEmitter.cpp 
b/llvm/utils/TableGen/RegisterBankEmitter.cpp
index 6a45213e1d66..0725657150f8 100644
--- a/llvm/utils/TableGen/RegisterBankEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterBankEmitter.cpp
@@ -168,7 +168,7 @@ void RegisterBankEmitter::emitBaseClassDefinition(
 ///to the class.
 static void visitRegisterBankClasses(
 const CodeGenRegBank &RegisterClassHierarchy,
-const CodeGenRegisterClass *RC, const Twine Kind,
+const CodeGenRegisterClass *RC, const Twine &Kind,
 std::function VisitFn,
 SmallPtrSetImpl &VisitedRCs) {
 
@@ -182,7 +182,7 @@ static void visitRegisterBankClasses(
 
   for (const auto &PossibleSubclass : RegisterClassHierarchy.getRegClasses()) {
 std::string TmpKind =
-(Twine(Kind) + " (" + PossibleSubclass.getName() + ")").str();
+(Kind + " (" + PossibleSubclass.getName() + ")").str();
 
 // Visit each subclass of an explicitly named class.
 if (RC != &PossibleSubclass && RC->hasSubClass(&PossibleSubclass))



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] b69fe6a - [X86] Add icmp ne/eq (srl (ctlz x), log2(bw)) test coverage.

2021-01-06 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-06T15:50:29Z
New Revision: b69fe6a85db43df27ebb260716d41a3e1b0d7534

URL: 
https://github.com/llvm/llvm-project/commit/b69fe6a85db43df27ebb260716d41a3e1b0d7534
DIFF: 
https://github.com/llvm/llvm-project/commit/b69fe6a85db43df27ebb260716d41a3e1b0d7534.diff

LOG: [X86] Add icmp ne/eq (srl (ctlz x), log2(bw)) test coverage.

Add vector coverage as well (which isn't currently supported).

Added: 
llvm/test/CodeGen/X86/lzcnt-cmp.ll

Modified: 


Removed: 




diff  --git a/llvm/test/CodeGen/X86/lzcnt-cmp.ll 
b/llvm/test/CodeGen/X86/lzcnt-cmp.ll
new file mode 100644
index ..435b09dd5d08
--- /dev/null
+++ b/llvm/test/CodeGen/X86/lzcnt-cmp.ll
@@ -0,0 +1,258 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-- -mattr=+lzcnt | FileCheck %s 
--check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+lzcnt | FileCheck %s 
--check-prefix=X64
+
+define i1 @lshr_ctlz_cmpeq_one_i64(i64 %in) {
+; X86-LABEL: lshr_ctlz_cmpeq_one_i64:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:sete %al
+; X86-NEXT:retl
+;
+; X64-LABEL: lshr_ctlz_cmpeq_one_i64:
+; X64:   # %bb.0:
+; X64-NEXT:testq %rdi, %rdi
+; X64-NEXT:sete %al
+; X64-NEXT:retq
+  %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 0)
+  %lshr = lshr i64 %ctlz, 6
+  %icmp = icmp eq i64 %lshr, 1
+  ret i1 %icmp
+}
+
+define i1 @lshr_ctlz_undef_cmpeq_one_i64(i64 %in) {
+; X86-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
+; X86:   # %bb.0:
+; X86-NEXT:xorl %eax, %eax
+; X86-NEXT:cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:jne .LBB1_2
+; X86-NEXT:  # %bb.1:
+; X86-NEXT:lzcntl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:addl $32, %eax
+; X86-NEXT:  .LBB1_2:
+; X86-NEXT:testb $64, %al
+; X86-NEXT:setne %al
+; X86-NEXT:retl
+;
+; X64-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
+; X64:   # %bb.0:
+; X64-NEXT:lzcntq %rdi, %rax
+; X64-NEXT:shrq $6, %rax
+; X64-NEXT:cmpl $1, %eax
+; X64-NEXT:sete %al
+; X64-NEXT:retq
+  %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1)
+  %lshr = lshr i64 %ctlz, 6
+  %icmp = icmp eq i64 %lshr, 1
+  ret i1 %icmp
+}
+
+define i1 @lshr_ctlz_cmpne_zero_i64(i64 %in) {
+; X86-LABEL: lshr_ctlz_cmpne_zero_i64:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:sete %al
+; X86-NEXT:retl
+;
+; X64-LABEL: lshr_ctlz_cmpne_zero_i64:
+; X64:   # %bb.0:
+; X64-NEXT:testq %rdi, %rdi
+; X64-NEXT:sete %al
+; X64-NEXT:retq
+  %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 0)
+  %lshr = lshr i64 %ctlz, 6
+  %icmp = icmp ne i64 %lshr, 0
+  ret i1 %icmp
+}
+
+define i1 @lshr_ctlz_undef_cmpne_zero_i64(i64 %in) {
+; X86-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
+; X86:   # %bb.0:
+; X86-NEXT:xorl %eax, %eax
+; X86-NEXT:cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:jne .LBB3_2
+; X86-NEXT:  # %bb.1:
+; X86-NEXT:lzcntl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:addl $32, %eax
+; X86-NEXT:  .LBB3_2:
+; X86-NEXT:testb $64, %al
+; X86-NEXT:setne %al
+; X86-NEXT:retl
+;
+; X64-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
+; X64:   # %bb.0:
+; X64-NEXT:lzcntq %rdi, %rax
+; X64-NEXT:testb $64, %al
+; X64-NEXT:setne %al
+; X64-NEXT:retq
+  %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1)
+  %lshr = lshr i64 %ctlz, 6
+  %icmp = icmp ne i64 %lshr, 0
+  ret i1 %icmp
+}
+
+define <2 x i64> @lshr_ctlz_cmpeq_zero_v2i64(<2 x i64> %in) {
+; X86-LABEL: lshr_ctlz_cmpeq_zero_v2i64:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:xorl %edx, %edx
+; X86-NEXT:cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:movl $0, %ecx
+; X86-NEXT:jne .LBB4_2
+; X86-NEXT:  # %bb.1:
+; X86-NEXT:lzcntl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:addl $32, %ecx
+; X86-NEXT:  .LBB4_2:
+; X86-NEXT:cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:jne .LBB4_4
+; X86-NEXT:  # %bb.3:
+; X86-NEXT:lzcntl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:addl $32, %edx
+; X86-NEXT:  .LBB4_4:
+; X86-NEXT:andl $-64, %edx
+; X86-NEXT:cmpl $1, %edx
+; X86-NEXT:sbbl %edx, %edx
+; X86-NEXT:andl $-64, %ecx
+; X86-NEXT:cmpl $1, %ecx
+; X86-NEXT:sbbl %ecx, %ecx
+; X86-NEXT:movl %ecx, 12(%eax)
+; X86-NEXT:movl %ecx, 8(%eax)
+; X86-NEXT:movl %edx, 4(%eax)
+; X86-NEXT:movl %edx, (%eax)
+; X86-NEXT:retl $4
+;
+; X64-LABEL: lshr_ctlz_cmpeq_zero_v2i64:
+; X64:   # %bb.0:
+; X64-NEXT:movdqa %xmm0, %xmm1
+; X64-NEXT:psrlq $1, %xmm1
+; X64-NEXT:por %xmm0, %xmm1
+; X64-NEXT:movdqa %xmm1, %xmm0
+; X64-NEXT:psrlq $2, %xmm0
+; X64-NEXT:por %xmm1, %xmm0
+; X64-NEXT:movdqa %xmm0, %xmm1
+; X64-NEXT:psrlq $4, %xmm1
+; X64-NEXT:por %xmm0, %xmm1
+; X64-NEXT:movdqa %xmm1, %xmm0
+; X64-NEXT:ps

[llvm-branch-commits] [llvm] 500864f - Remove some unused includes. NFCI.

2021-01-06 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-06T15:50:29Z
New Revision: 500864f928c272e8ebfd6493cb749083124bfd8b

URL: 
https://github.com/llvm/llvm-project/commit/500864f928c272e8ebfd6493cb749083124bfd8b
DIFF: 
https://github.com/llvm/llvm-project/commit/500864f928c272e8ebfd6493cb749083124bfd8b.diff

LOG: Remove some unused  includes. NFCI.

 (unlike many other c++ headers) is relatively clean, so if the file 
doesn't use std::vector then it shouldn't need the header.

Added: 


Modified: 
llvm/include/llvm/Analysis/InlineAdvisor.h
llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
llvm/include/llvm/ExecutionEngine/JITEventListener.h

Removed: 




diff  --git a/llvm/include/llvm/Analysis/InlineAdvisor.h 
b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 4dbd5786ac7d..f051706dca16 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -9,13 +9,11 @@
 #ifndef LLVM_INLINEADVISOR_H_
 #define LLVM_INLINEADVISOR_H_
 
-#include 
-#include 
-#include 
-
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/PassManager.h"
+#include 
+#include 
 
 namespace llvm {
 class BasicBlock;

diff  --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h 
b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
index b47aaa53eb89..893bc6e013f4 100644
--- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
@@ -57,7 +57,6 @@
 #include 
 #include 
 #include 
-#include 
 
 namespace llvm {
 

diff  --git a/llvm/include/llvm/ExecutionEngine/JITEventListener.h 
b/llvm/include/llvm/ExecutionEngine/JITEventListener.h
index 606b6f7cc128..4eefd993de2b 100644
--- a/llvm/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/llvm/include/llvm/ExecutionEngine/JITEventListener.h
@@ -20,7 +20,6 @@
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include 
-#include 
 
 namespace llvm {
 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 1307e3f - [TargetLowering] Add icmp ne/eq (srl (ctlz x), log2(bw)) vector support.

2021-01-06 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-06T16:13:51Z
New Revision: 1307e3f6c46cc3a6e6ad9cd46fc67efafcac939e

URL: 
https://github.com/llvm/llvm-project/commit/1307e3f6c46cc3a6e6ad9cd46fc67efafcac939e
DIFF: 
https://github.com/llvm/llvm-project/commit/1307e3f6c46cc3a6e6ad9cd46fc67efafcac939e.diff

LOG: [TargetLowering] Add icmp ne/eq (srl (ctlz x), log2(bw)) vector support.

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/X86/lzcnt-cmp.ll

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp 
b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index d895a53e5a83..f5abb2c513fb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3486,35 +3486,36 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue 
N0, SDValue N1,
 // Optimize some CTPOP cases.
 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
   return V;
-  }
-
-  // FIXME: Support vectors.
-  if (auto *N1C = dyn_cast(N1.getNode())) {
-const APInt &C1 = N1C->getAPIntValue();
 
 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
 // equality comparison, then we're just comparing whether X itself is
 // zero.
 if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
-N0.getOperand(1).getOpcode() == ISD::Constant) {
-  const APInt &ShAmt = N0.getConstantOperandAPInt(1);
-  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
-  ShAmt == Log2_32(N0.getValueSizeInBits())) {
-if ((C1 == 0) == (Cond == ISD::SETEQ)) {
-  // (srl (ctlz x), 5) == 0  -> X != 0
-  // (srl (ctlz x), 5) != 1  -> X != 0
-  Cond = ISD::SETNE;
-} else {
-  // (srl (ctlz x), 5) != 0  -> X == 0
-  // (srl (ctlz x), 5) == 1  -> X == 0
-  Cond = ISD::SETEQ;
+isPowerOf2_32(N0.getScalarValueSizeInBits())) {
+  if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
+if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
+  if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+// (srl (ctlz x), 5) == 0  -> X != 0
+// (srl (ctlz x), 5) != 1  -> X != 0
+Cond = ISD::SETNE;
+  } else {
+// (srl (ctlz x), 5) != 0  -> X == 0
+// (srl (ctlz x), 5) == 1  -> X == 0
+Cond = ISD::SETEQ;
+  }
+  SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
+  return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
+  Cond);
 }
-SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
-return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
-Zero, Cond);
   }
 }
+  }
+
+  // FIXME: Support vectors.
+  if (auto *N1C = dyn_cast(N1.getNode())) {
+const APInt &C1 = N1C->getAPIntValue();
 
 // (zext x) == C --> x == (trunc C)
 // (sext x) == C --> x == (trunc C)

diff  --git a/llvm/test/CodeGen/X86/lzcnt-cmp.ll 
b/llvm/test/CodeGen/X86/lzcnt-cmp.ll
index 435b09dd5d08..3823524f552a 100644
--- a/llvm/test/CodeGen/X86/lzcnt-cmp.ll
+++ b/llvm/test/CodeGen/X86/lzcnt-cmp.ll
@@ -96,75 +96,36 @@ define i1 @lshr_ctlz_undef_cmpne_zero_i64(i64 %in) {
 define <2 x i64> @lshr_ctlz_cmpeq_zero_v2i64(<2 x i64> %in) {
 ; X86-LABEL: lshr_ctlz_cmpeq_zero_v2i64:
 ; X86:   # %bb.0:
+; X86-NEXT:pushl %esi
+; X86-NEXT:.cfi_def_cfa_offset 8
+; X86-NEXT:.cfi_offset %esi, -8
 ; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:xorl %ecx, %ecx
+; X86-NEXT:orl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:setne %cl
+; X86-NEXT:negl %ecx
 ; X86-NEXT:xorl %edx, %edx
-; X86-NEXT:cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT:movl $0, %ecx
-; X86-NEXT:jne .LBB4_2
-; X86-NEXT:  # %bb.1:
-; X86-NEXT:lzcntl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:addl $32, %ecx
-; X86-NEXT:  .LBB4_2:
-; X86-NEXT:cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT:jne .LBB4_4
-; X86-NEXT:  # %bb.3:
-; X86-NEXT:lzcntl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:addl $32, %edx
-; X86-NEXT:  .LBB4_4:
-; X86-NEXT:andl $-64, %edx
-; X86-NEXT:cmpl $1, %edx
-; X86-NEXT:sbbl %edx, %edx
-; X86-NEXT:andl $-64, %ecx
-; X86-NEXT:cmpl $1, %ecx
-; X86-NEXT:sbbl %ecx, %ecx
-; X86-NEXT:movl %ecx, 12(%eax)
-; X86-NEXT:movl %ecx, 8(%eax)
-; X86-NEXT:movl %edx, 4(%eax)
-; X86-NEXT:movl %edx, (%eax)
+; X86-NEXT:orl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:setne %dl
+; X86-NEXT:negl %edx
+; X86-NEXT:movl %edx, 12(%eax)
+; X86-NEXT:movl %edx, 8(%eax)
+; X86

[llvm-branch-commits] [llvm] 3f8c252 - [X86] Add commuted patterns test coverage for D93599

2021-01-06 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-01-06T18:03:20Z
New Revision: 3f8c2520c0424860b4bd3ae7b20f8033ed09363a

URL: 
https://github.com/llvm/llvm-project/commit/3f8c2520c0424860b4bd3ae7b20f8033ed09363a
DIFF: 
https://github.com/llvm/llvm-project/commit/3f8c2520c0424860b4bd3ae7b20f8033ed09363a.diff

LOG: [X86] Add commuted patterns test coverage for D93599

Suggested by @spatel

Added: 


Modified: 
llvm/test/CodeGen/X86/cmp-concat.ll

Removed: 




diff  --git a/llvm/test/CodeGen/X86/cmp-concat.ll 
b/llvm/test/CodeGen/X86/cmp-concat.ll
index a622ad7faff7..e3a69df86563 100644
--- a/llvm/test/CodeGen/X86/cmp-concat.ll
+++ b/llvm/test/CodeGen/X86/cmp-concat.ll
@@ -35,6 +35,46 @@ define i1 @cmp_anybits_concat_i32(i32 %x, i32 %y) {
   ret i1 %r
 }
 
+define i1 @cmp_anybits_concat_shl_shl_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: cmp_anybits_concat_shl_shl_i16:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:# kill: def $esi killed $esi def $rsi
+; CHECK-NEXT:movzwl %di, %eax
+; CHECK-NEXT:movzwl %si, %ecx
+; CHECK-NEXT:shlq $32, %rax
+; CHECK-NEXT:shlq $8, %rcx
+; CHECK-NEXT:orq %rax, %rcx
+; CHECK-NEXT:sete %al
+; CHECK-NEXT:retq
+  %zx = zext i16 %x to i64
+  %zy = zext i16 %y to i64
+  %sx = shl i64 %zx, 32
+  %sy = shl i64 %zy, 8
+  %or = or i64 %sx, %sy
+  %r = icmp eq i64 %or, 0
+  ret i1 %r
+}
+
+define i1 @cmp_anybits_concat_shl_shl_i16_commute(i16 %x, i16 %y) {
+; CHECK-LABEL: cmp_anybits_concat_shl_shl_i16_commute:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:# kill: def $esi killed $esi def $rsi
+; CHECK-NEXT:movzwl %di, %eax
+; CHECK-NEXT:movzwl %si, %ecx
+; CHECK-NEXT:shlq $32, %rax
+; CHECK-NEXT:shlq $8, %rcx
+; CHECK-NEXT:orq %rax, %rcx
+; CHECK-NEXT:sete %al
+; CHECK-NEXT:retq
+  %zx = zext i16 %x to i64
+  %zy = zext i16 %y to i64
+  %sx = shl i64 %zx, 32
+  %sy = shl i64 %zy, 8
+  %or = or i64 %sy, %sx
+  %r = icmp eq i64 %or, 0
+  ret i1 %r
+}
+
 define <16 x i8> @cmp_allbits_concat_v16i8(<16 x i8> %x, <16 x i8> %y) {
 ; CHECK-LABEL: cmp_allbits_concat_v16i8:
 ; CHECK:   # %bb.0:



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


  1   2   3   >