Author: Anton Afanasyev Date: 2020-12-08T12:00:52+03:00 New Revision: 6c3f56efa6e6ca746ba3dafae43251105f16e5fb
URL: https://github.com/llvm/llvm-project/commit/6c3f56efa6e6ca746ba3dafae43251105f16e5fb DIFF: https://github.com/llvm/llvm-project/commit/6c3f56efa6e6ca746ba3dafae43251105f16e5fb.diff LOG: [SLP][Test] Differentiate SSE/AVX512 test coverage (NFC) Add test coverage for SSE/AVX512 for insert-after-bundle.ll test. Prepare this test for accurate showing of PR46983 fix. Added: Modified: llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll Removed: ################################################################################ diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll index 2a4d457f1063..fa1183400cb0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -slp-vectorizer < %s | FileCheck %s +; RUN: opt -S -slp-vectorizer -mattr=+sse < %s | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt -S -slp-vectorizer -mattr=+avx512f < %s | FileCheck %s --check-prefixes=CHECK,AVX512 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -410,75 +411,109 @@ for.end: ; preds = %for.body @ia = common local_unnamed_addr global [64 x i32] zeroinitializer, align 16 define i32 @foo1() local_unnamed_addr #0 { -; CHECK-LABEL: @foo1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([64 x i32]* @ib to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([64 x i32]* @ia to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP16:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP18:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: store <4 x i32> [[TMP23]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP24:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP26:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: store <4 x i32> [[TMP27]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP28:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: store <4 x i32> [[TMP29]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP30:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: store <4 x i32> [[TMP31]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60) to <4 x i32>*), align 16 -; CHECK-NEXT: br label [[FOR_BODY5:%.*]] -; CHECK: for.cond3: -; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1 -; CHECK-NEXT: [[CMP4:%.*]] = icmp ult i64 [[INDVARS_IV]], 63 -; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_BODY5]], label [[FOR_END14:%.*]] -; CHECK: for.body5: -; CHECK-NEXT: [[INDVARS_IV]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND3:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4 -; CHECK-NEXT: [[NEG10:%.*]] = xor i32 [[TMP33]], -1 -; CHECK-NEXT: [[CMP11:%.*]] = icmp eq i32 [[TMP32]], [[NEG10]] -; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_COND3]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: tail call void @abort() -; CHECK-NEXT: unreachable -; CHECK: for.end14: -; CHECK-NEXT: ret i32 0 +; SSE-LABEL: @foo1( +; SSE-NEXT: entry: +; SSE-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([64 x i32]* @ib to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], <i32 -1, i32 -1, i32 -1, i32 -1> +; SSE-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([64 x i32]* @ia to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], <i32 -1, i32 -1, i32 -1, i32 -1> +; SSE-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1> +; SSE-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], <i32 -1, i32 -1, i32 -1, i32 -1> +; SSE-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], <i32 -1, i32 -1, i32 -1, i32 -1> +; SSE-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], <i32 -1, i32 -1, i32 -1, i32 -1> +; SSE-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], <i32 -1, i32 -1, i32 -1, i32 -1> +; SSE-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], <i32 -1, i32 -1, i32 -1, i32 -1> +; SSE-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP16:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], <i32 -1, i32 -1, i32 -1, i32 -1> +; SSE-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP18:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], <i32 -1, i32 -1, i32 -1, i32 -1> +; SSE-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP20:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], <i32 -1, i32 -1, i32 -1, i32 -1> +; SSE-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP22:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], <i32 -1, i32 -1, i32 -1, i32 -1> +; SSE-NEXT: store <4 x i32> [[TMP23]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP24:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], <i32 -1, i32 -1, i32 -1, i32 -1> +; SSE-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP26:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], <i32 -1, i32 -1, i32 -1, i32 -1> +; SSE-NEXT: store <4 x i32> [[TMP27]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP28:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], <i32 -1, i32 -1, i32 -1, i32 -1> +; SSE-NEXT: store <4 x i32> [[TMP29]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP30:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], <i32 -1, i32 -1, i32 -1, i32 -1> +; SSE-NEXT: store <4 x i32> [[TMP31]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60) to <4 x i32>*), align 16 +; SSE-NEXT: br label [[FOR_BODY5:%.*]] +; SSE: for.cond3: +; SSE-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1 +; SSE-NEXT: [[CMP4:%.*]] = icmp ult i64 [[INDVARS_IV]], 63 +; SSE-NEXT: br i1 [[CMP4]], label [[FOR_BODY5]], label [[FOR_END14:%.*]] +; SSE: for.body5: +; SSE-NEXT: [[INDVARS_IV]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND3:%.*]] ] +; SSE-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 [[INDVARS_IV]] +; SSE-NEXT: [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4 +; SSE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 [[INDVARS_IV]] +; SSE-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4 +; SSE-NEXT: [[NEG10:%.*]] = xor i32 [[TMP33]], -1 +; SSE-NEXT: [[CMP11:%.*]] = icmp eq i32 [[TMP32]], [[NEG10]] +; SSE-NEXT: br i1 [[CMP11]], label [[FOR_COND3]], label [[IF_THEN:%.*]] +; SSE: if.then: +; SSE-NEXT: tail call void @abort() +; SSE-NEXT: unreachable +; SSE: for.end14: +; SSE-NEXT: ret i32 0 +; +; AVX512-LABEL: @foo1( +; AVX512-NEXT: entry: +; AVX512-NEXT: [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([64 x i32]* @ib to <16 x i32>*), align 16 +; AVX512-NEXT: [[TMP1:%.*]] = xor <16 x i32> [[TMP0]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> +; AVX512-NEXT: store <16 x i32> [[TMP1]], <16 x i32>* bitcast ([64 x i32]* @ia to <16 x i32>*), align 16 +; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <16 x i32>*), align 16 +; AVX512-NEXT: [[TMP3:%.*]] = xor <16 x i32> [[TMP2]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> +; AVX512-NEXT: store <16 x i32> [[TMP3]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <16 x i32>*), align 16 +; AVX512-NEXT: [[TMP4:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <16 x i32>*), align 16 +; AVX512-NEXT: [[TMP5:%.*]] = xor <16 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> +; AVX512-NEXT: store <16 x i32> [[TMP5]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <16 x i32>*), align 16 +; AVX512-NEXT: [[TMP6:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <16 x i32>*), align 16 +; AVX512-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[TMP6]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> +; AVX512-NEXT: store <16 x i32> [[TMP7]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <16 x i32>*), align 16 +; AVX512-NEXT: br label [[FOR_BODY5:%.*]] +; AVX512: for.cond3: +; AVX512-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1 +; AVX512-NEXT: [[CMP4:%.*]] = icmp ult i64 [[INDVARS_IV]], 63 +; AVX512-NEXT: br i1 [[CMP4]], label [[FOR_BODY5]], label [[FOR_END14:%.*]] +; AVX512: for.body5: +; AVX512-NEXT: [[INDVARS_IV]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND3:%.*]] ] +; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 [[INDVARS_IV]] +; AVX512-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4 +; AVX512-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 [[INDVARS_IV]] +; AVX512-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4 +; AVX512-NEXT: [[NEG10:%.*]] = xor i32 [[TMP9]], -1 +; AVX512-NEXT: [[CMP11:%.*]] = icmp eq i32 [[TMP8]], [[NEG10]] +; AVX512-NEXT: br i1 [[CMP11]], label [[FOR_COND3]], label [[IF_THEN:%.*]] +; AVX512: if.then: +; AVX512-NEXT: tail call void @abort() +; AVX512-NEXT: unreachable +; AVX512: for.end14: +; AVX512-NEXT: ret i32 0 ; entry: %0 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 0), align 16 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits