[llvm] 6c3f56e - [SLP][Test] Differentiate SSE/AVX512 test coverage (NFC)
Anton Afanasyev via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 8 01:02:25 PST 2020
Author: Anton Afanasyev
Date: 2020-12-08T12:00:52+03:00
New Revision: 6c3f56efa6e6ca746ba3dafae43251105f16e5fb
URL: https://github.com/llvm/llvm-project/commit/6c3f56efa6e6ca746ba3dafae43251105f16e5fb
DIFF: https://github.com/llvm/llvm-project/commit/6c3f56efa6e6ca746ba3dafae43251105f16e5fb.diff
LOG: [SLP][Test] Differentiate SSE/AVX512 test coverage (NFC)
Add test coverage for SSE/AVX512 for insert-after-bundle.ll test.
Prepare this test for accurate showing of PR46983 fix.
Added:
Modified:
llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
index 2a4d457f1063..fa1183400cb0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -slp-vectorizer < %s | FileCheck %s
+; RUN: opt -S -slp-vectorizer -mattr=+sse < %s | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt -S -slp-vectorizer -mattr=+avx512f < %s | FileCheck %s --check-prefixes=CHECK,AVX512
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@@ -410,75 +411,109 @@ for.end: ; preds = %for.body
@ia = common local_unnamed_addr global [64 x i32] zeroinitializer, align 16
define i32 @foo1() local_unnamed_addr #0 {
-; CHECK-LABEL: @foo1(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([64 x i32]* @ib to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([64 x i32]* @ia to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP16:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP18:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: store <4 x i32> [[TMP23]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP24:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP26:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: store <4 x i32> [[TMP27]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP28:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: store <4 x i32> [[TMP29]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP30:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60) to <4 x i32>*), align 16
-; CHECK-NEXT: [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: store <4 x i32> [[TMP31]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60) to <4 x i32>*), align 16
-; CHECK-NEXT: br label [[FOR_BODY5:%.*]]
-; CHECK: for.cond3:
-; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1
-; CHECK-NEXT: [[CMP4:%.*]] = icmp ult i64 [[INDVARS_IV]], 63
-; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_BODY5]], label [[FOR_END14:%.*]]
-; CHECK: for.body5:
-; CHECK-NEXT: [[INDVARS_IV]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND3:%.*]] ]
-; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
-; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4
-; CHECK-NEXT: [[NEG10:%.*]] = xor i32 [[TMP33]], -1
-; CHECK-NEXT: [[CMP11:%.*]] = icmp eq i32 [[TMP32]], [[NEG10]]
-; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_COND3]], label [[IF_THEN:%.*]]
-; CHECK: if.then:
-; CHECK-NEXT: tail call void @abort()
-; CHECK-NEXT: unreachable
-; CHECK: for.end14:
-; CHECK-NEXT: ret i32 0
+; SSE-LABEL: @foo1(
+; SSE-NEXT: entry:
+; SSE-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([64 x i32]* @ib to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([64 x i32]* @ia to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP16:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP18:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP20:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP22:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT: store <4 x i32> [[TMP23]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP24:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP26:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT: store <4 x i32> [[TMP27]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP28:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT: store <4 x i32> [[TMP29]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP30:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60) to <4 x i32>*), align 16
+; SSE-NEXT: [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT: store <4 x i32> [[TMP31]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60) to <4 x i32>*), align 16
+; SSE-NEXT: br label [[FOR_BODY5:%.*]]
+; SSE: for.cond3:
+; SSE-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1
+; SSE-NEXT: [[CMP4:%.*]] = icmp ult i64 [[INDVARS_IV]], 63
+; SSE-NEXT: br i1 [[CMP4]], label [[FOR_BODY5]], label [[FOR_END14:%.*]]
+; SSE: for.body5:
+; SSE-NEXT: [[INDVARS_IV]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND3:%.*]] ]
+; SSE-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 [[INDVARS_IV]]
+; SSE-NEXT: [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
+; SSE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 [[INDVARS_IV]]
+; SSE-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4
+; SSE-NEXT: [[NEG10:%.*]] = xor i32 [[TMP33]], -1
+; SSE-NEXT: [[CMP11:%.*]] = icmp eq i32 [[TMP32]], [[NEG10]]
+; SSE-NEXT: br i1 [[CMP11]], label [[FOR_COND3]], label [[IF_THEN:%.*]]
+; SSE: if.then:
+; SSE-NEXT: tail call void @abort()
+; SSE-NEXT: unreachable
+; SSE: for.end14:
+; SSE-NEXT: ret i32 0
+;
+; AVX512-LABEL: @foo1(
+; AVX512-NEXT: entry:
+; AVX512-NEXT: [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([64 x i32]* @ib to <16 x i32>*), align 16
+; AVX512-NEXT: [[TMP1:%.*]] = xor <16 x i32> [[TMP0]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; AVX512-NEXT: store <16 x i32> [[TMP1]], <16 x i32>* bitcast ([64 x i32]* @ia to <16 x i32>*), align 16
+; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <16 x i32>*), align 16
+; AVX512-NEXT: [[TMP3:%.*]] = xor <16 x i32> [[TMP2]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; AVX512-NEXT: store <16 x i32> [[TMP3]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <16 x i32>*), align 16
+; AVX512-NEXT: [[TMP4:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <16 x i32>*), align 16
+; AVX512-NEXT: [[TMP5:%.*]] = xor <16 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; AVX512-NEXT: store <16 x i32> [[TMP5]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <16 x i32>*), align 16
+; AVX512-NEXT: [[TMP6:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <16 x i32>*), align 16
+; AVX512-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[TMP6]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; AVX512-NEXT: store <16 x i32> [[TMP7]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <16 x i32>*), align 16
+; AVX512-NEXT: br label [[FOR_BODY5:%.*]]
+; AVX512: for.cond3:
+; AVX512-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1
+; AVX512-NEXT: [[CMP4:%.*]] = icmp ult i64 [[INDVARS_IV]], 63
+; AVX512-NEXT: br i1 [[CMP4]], label [[FOR_BODY5]], label [[FOR_END14:%.*]]
+; AVX512: for.body5:
+; AVX512-NEXT: [[INDVARS_IV]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND3:%.*]] ]
+; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 [[INDVARS_IV]]
+; AVX512-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
+; AVX512-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 [[INDVARS_IV]]
+; AVX512-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4
+; AVX512-NEXT: [[NEG10:%.*]] = xor i32 [[TMP9]], -1
+; AVX512-NEXT: [[CMP11:%.*]] = icmp eq i32 [[TMP8]], [[NEG10]]
+; AVX512-NEXT: br i1 [[CMP11]], label [[FOR_COND3]], label [[IF_THEN:%.*]]
+; AVX512: if.then:
+; AVX512-NEXT: tail call void @abort()
+; AVX512-NEXT: unreachable
+; AVX512: for.end14:
+; AVX512-NEXT: ret i32 0
;
entry:
%0 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 0), align 16
More information about the llvm-commits
mailing list