[llvm] [SLP]Try to vectorize small graph with extractelements, used in (PR #83468)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 29 11:40:52 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Alexey Bataev (alexey-bataev)
<details>
<summary>Changes</summary>
buildvector.
If the graph incudes only single "gather" node with only
extractelements/undefs, which used only in insertelement-based
buildvector sequences, it still might be profitable to vectorize it.
Need to rely on the cost model, not throw this graph away immediately.
---
Patch is 78.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83468.diff
6 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+14)
- (modified) llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll (+323-332)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll (+9-9)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll (+2)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll (+2)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/reduction-transpose.ll (+68-47)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 94b7c4952f055e..6937cae0d3e13b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8974,6 +8974,20 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
if (isFullyVectorizableTinyTree(ForReduction))
return false;
+ // Check if any of the gather node forms an insertelement buildvector
+ // somewhere.
+ if (any_of(VectorizableTree, [](const std::unique_ptr<TreeEntry> &TE) {
+ return TE->State == TreeEntry::NeedToGather &&
+ all_of(TE->Scalars, [](Value *V) {
+ return isa<ExtractElementInst, UndefValue>(V) ||
+ (!V->hasNUsesOrMore(8) &&
+ any_of(V->users(), [](User *U) {
+ return isa<InsertElementInst>(U);
+ }));
+ });
+ }))
+ return false;
+
assert(VectorizableTree.empty()
? ExternalUses.empty()
: true && "We shouldn't have any external users");
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
index e167b6a47af592..ec431a56a8609f 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
@@ -1,369 +1,360 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
-; RUN: opt -S -mtriple riscv64-unknown-linux-gnu < %s --passes=slp-vectorizer -mattr=+v -slp-threshold=-40 | FileCheck %s
+; RUN: opt -S -mtriple riscv64-unknown-linux-gnu < %s --passes=slp-vectorizer -mattr=+v -slp-threshold=-20 | FileCheck %s
define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.ptr, ptr %add.ptr64) {
; CHECK-LABEL: define i32 @test(
; CHECK-SAME: ptr [[PIX1:%.*]], ptr [[PIX2:%.*]], i64 [[IDX_EXT:%.*]], i64 [[IDX_EXT63:%.*]], ptr [[ADD_PTR:%.*]], ptr [[ADD_PTR64:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x ptr> poison, ptr [[PIX1]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x ptr> [[TMP0]], <2 x ptr> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i64> <i64 4, i64 6>
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x ptr> poison, ptr [[PIX2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x ptr> [[TMP3]], <2 x ptr> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <2 x ptr> [[TMP4]], <2 x i64> <i64 4, i64 6>
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, <2 x ptr> [[TMP4]], <2 x i64> <i64 1, i64 3>
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i64> <i64 5, i64 7>
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, <2 x ptr> [[TMP4]], <2 x i64> <i64 5, i64 7>
-; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr i8, ptr [[PIX1]], i64 2
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[PIX1]], align 1
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[PIX1]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i64> <i64 4, i64 6>
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x ptr> poison, ptr [[PIX2]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x ptr> [[TMP4]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, <2 x ptr> [[TMP5]], <2 x i64> <i64 4, i64 6>
+; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr i8, ptr [[PIX1]], i64 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, <2 x ptr> [[TMP5]], <2 x i64> <i64 1, i64 3>
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i64> <i64 5, i64 7>
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, <2 x ptr> [[TMP5]], <2 x i64> <i64 5, i64 7>
; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i8, ptr [[PIX2]], i64 2
+; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr i8, ptr [[PIX1]], i64 3
+; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1
+; CHECK-NEXT: [[CONV33:%.*]] = zext i8 [[TMP10]] to i32
; CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr i8, ptr [[PIX1]], i64 [[IDX_EXT]]
-; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[ADD_PTR3]], align 1
-; CHECK-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP9]] to i32
+; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[ADD_PTR3]], align 1
+; CHECK-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP11]] to i32
; CHECK-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 1
; CHECK-NEXT: [[ARRAYIDX32_1:%.*]] = getelementptr i8, ptr [[ADD_PTR3]], i64 3
-; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX32_1]], align 1
-; CHECK-NEXT: [[CONV33_1:%.*]] = zext i8 [[TMP10]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX32_1]], align 1
+; CHECK-NEXT: [[CONV33_1:%.*]] = zext i8 [[TMP12]] to i32
; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]]
-; CHECK-NEXT: [[ARRAYIDX20_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 2
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> poison, ptr [[ADD_PTR_1]], i32 0
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x ptr> [[TMP11]], ptr [[ARRAYIDX20_2]], i32 1
-; CHECK-NEXT: [[TMP13:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP12]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
+; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 4
+; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 4
+; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i8>, ptr [[ADD_PTR_1]], align 1
; CHECK-NEXT: [[TMP14:%.*]] = zext <2 x i8> [[TMP13]] to <2 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i8>, ptr [[ADD_PTR64_1]], align 1
+; CHECK-NEXT: [[TMP16:%.*]] = zext <2 x i8> [[TMP15]] to <2 x i32>
+; CHECK-NEXT: [[TMP17:%.*]] = sub <2 x i32> [[TMP14]], [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = load <2 x i8>, ptr [[ARRAYIDX3_2]], align 1
+; CHECK-NEXT: [[TMP19:%.*]] = zext <2 x i8> [[TMP18]] to <2 x i32>
+; CHECK-NEXT: [[TMP20:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_2]], align 1
+; CHECK-NEXT: [[TMP21:%.*]] = zext <2 x i8> [[TMP20]] to <2 x i32>
+; CHECK-NEXT: [[TMP22:%.*]] = sub <2 x i32> [[TMP19]], [[TMP21]]
+; CHECK-NEXT: [[TMP23:%.*]] = shl <2 x i32> [[TMP22]], <i32 16, i32 16>
+; CHECK-NEXT: [[TMP24:%.*]] = add <2 x i32> [[TMP23]], [[TMP17]]
+; CHECK-NEXT: [[ARRAYIDX20_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 2
; CHECK-NEXT: [[ARRAYIDX22_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 2
-; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x ptr> poison, ptr [[ADD_PTR64_1]], i32 0
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> [[TMP15]], ptr [[ARRAYIDX22_2]], i32 1
-; CHECK-NEXT: [[TMP17:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP16]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
-; CHECK-NEXT: [[TMP18:%.*]] = zext <2 x i8> [[TMP17]] to <2 x i32>
-; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP14]], [[TMP18]]
-; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x ptr> [[TMP12]], <2 x ptr> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, <2 x ptr> [[TMP20]], <2 x i64> <i64 4, i64 6>
-; CHECK-NEXT: [[TMP22:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP21]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
-; CHECK-NEXT: [[TMP23:%.*]] = zext <2 x i8> [[TMP22]] to <2 x i32>
-; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x ptr> [[TMP16]], <2 x ptr> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, <2 x ptr> [[TMP24]], <2 x i64> <i64 4, i64 6>
-; CHECK-NEXT: [[TMP26:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP25]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
-; CHECK-NEXT: [[TMP27:%.*]] = zext <2 x i8> [[TMP26]] to <2 x i32>
-; CHECK-NEXT: [[TMP28:%.*]] = sub <2 x i32> [[TMP23]], [[TMP27]]
-; CHECK-NEXT: [[TMP29:%.*]] = shl <2 x i32> [[TMP28]], <i32 16, i32 16>
-; CHECK-NEXT: [[TMP30:%.*]] = add <2 x i32> [[TMP29]], [[TMP19]]
-; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i8, <2 x ptr> [[TMP20]], <2 x i64> <i64 1, i64 3>
-; CHECK-NEXT: [[TMP32:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP31]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
+; CHECK-NEXT: [[ARRAYIDX25_2:%.*]] = getelementptr i8, ptr [[ADD_PTR_1]], i64 6
+; CHECK-NEXT: [[ARRAYIDX27_2:%.*]] = getelementptr i8, ptr [[ADD_PTR64_1]], i64 6
+; CHECK-NEXT: [[TMP25:%.*]] = load <2 x i8>, ptr [[ARRAYIDX20_2]], align 1
+; CHECK-NEXT: [[TMP26:%.*]] = zext <2 x i8> [[TMP25]] to <2 x i32>
+; CHECK-NEXT: [[TMP27:%.*]] = load <2 x i8>, ptr [[ARRAYIDX22_2]], align 1
+; CHECK-NEXT: [[TMP28:%.*]] = zext <2 x i8> [[TMP27]] to <2 x i32>
+; CHECK-NEXT: [[TMP29:%.*]] = sub <2 x i32> [[TMP26]], [[TMP28]]
+; CHECK-NEXT: [[TMP30:%.*]] = load <2 x i8>, ptr [[ARRAYIDX25_2]], align 1
+; CHECK-NEXT: [[TMP31:%.*]] = zext <2 x i8> [[TMP30]] to <2 x i32>
+; CHECK-NEXT: [[TMP32:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_2]], align 1
; CHECK-NEXT: [[TMP33:%.*]] = zext <2 x i8> [[TMP32]] to <2 x i32>
-; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i8, <2 x ptr> [[TMP24]], <2 x i64> <i64 1, i64 3>
-; CHECK-NEXT: [[TMP35:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP34]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
-; CHECK-NEXT: [[TMP36:%.*]] = zext <2 x i8> [[TMP35]] to <2 x i32>
-; CHECK-NEXT: [[TMP37:%.*]] = sub <2 x i32> [[TMP33]], [[TMP36]]
-; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i8, <2 x ptr> [[TMP20]], <2 x i64> <i64 5, i64 7>
-; CHECK-NEXT: [[TMP39:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP38]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
-; CHECK-NEXT: [[TMP40:%.*]] = zext <2 x i8> [[TMP39]] to <2 x i32>
-; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i8, <2 x ptr> [[TMP24]], <2 x i64> <i64 5, i64 7>
-; CHECK-NEXT: [[TMP42:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP41]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
-; CHECK-NEXT: [[TMP43:%.*]] = zext <2 x i8> [[TMP42]] to <2 x i32>
-; CHECK-NEXT: [[TMP44:%.*]] = sub <2 x i32> [[TMP40]], [[TMP43]]
-; CHECK-NEXT: [[TMP45:%.*]] = shl <2 x i32> [[TMP44]], <i32 16, i32 16>
-; CHECK-NEXT: [[TMP46:%.*]] = add <2 x i32> [[TMP45]], [[TMP37]]
-; CHECK-NEXT: [[TMP47:%.*]] = sub <2 x i32> [[TMP30]], [[TMP46]]
-; CHECK-NEXT: [[TMP48:%.*]] = extractelement <2 x i32> [[TMP47]], i32 0
-; CHECK-NEXT: [[TMP49:%.*]] = extractelement <2 x i32> [[TMP47]], i32 1
-; CHECK-NEXT: [[SUB59_2:%.*]] = sub i32 [[TMP48]], [[TMP49]]
-; CHECK-NEXT: [[TMP50:%.*]] = load i8, ptr null, align 1
+; CHECK-NEXT: [[TMP34:%.*]] = sub <2 x i32> [[TMP31]], [[TMP33]]
+; CHECK-NEXT: [[TMP35:%.*]] = shl <2 x i32> [[TMP34]], <i32 16, i32 16>
+; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP35]], [[TMP29]]
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i32> [[TMP24]], i32 0
+; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i32> [[TMP24]], i32 1
+; CHECK-NEXT: [[ADD44_2:%.*]] = add i32 [[TMP38]], [[TMP37]]
+; CHECK-NEXT: [[SUB45_2:%.*]] = sub i32 [[TMP37]], [[TMP38]]
+; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP36]], i32 0
+; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i32> [[TMP36]], i32 1
+; CHECK-NEXT: [[ADD46_2:%.*]] = add i32 [[TMP40]], [[TMP39]]
+; CHECK-NEXT: [[SUB47_2:%.*]] = sub i32 [[TMP39]], [[TMP40]]
+; CHECK-NEXT: [[ADD48_2:%.*]] = add i32 [[ADD46_2]], [[ADD44_2]]
+; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr null, align 1
; CHECK-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2
; CHECK-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2
-; CHECK-NEXT: [[TMP51:%.*]] = load i8, ptr null, align 1
-; CHECK-NEXT: [[TMP52:%.*]] = insertelement <2 x ptr> <ptr poison, ptr null>, ptr [[ARRAYIDX20_3]], i32 0
-; CHECK-NEXT: [[TMP53:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP52]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
-; CHECK-NEXT: [[TMP54:%.*]] = zext <2 x i8> [[TMP53]] to <2 x i32>
-; CHECK-NEXT: [[TMP55:%.*]] = insertelement <2 x ptr> <ptr poison, ptr null>, ptr [[ARRAYIDX22_3]], i32 0
-; CHECK-NEXT: [[TMP56:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP55]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
-; CHECK-NEXT: [[TMP57:%.*]] = zext <2 x i8> [[TMP56]] to <2 x i32>
-; CHECK-NEXT: [[TMP58:%.*]] = sub <2 x i32> [[TMP54]], [[TMP57]]
-; CHECK-NEXT: [[TMP59:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> <i1 true, i1 true>, i32 2)
+; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr null, align 1
+; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x ptr> <ptr poison, ptr null>, ptr [[ARRAYIDX20_3]], i32 0
+; CHECK-NEXT: [[TMP44:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP43]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
+; CHECK-NEXT: [[TMP45:%.*]] = zext <2 x i8> [[TMP44]] to <2 x i32>
+; CHECK-NEXT: [[TMP46:%.*]] = insertelement <2 x ptr> <ptr poison, ptr null>, ptr [[ARRAYIDX22_3]], i32 0
+; CHECK-NEXT: [[TMP47:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP46]], i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
+; CHECK-NEXT: [[TMP48:%.*]] = zext <2 x i8> [[TMP47]] to <2 x i32>
+; CHECK-NEXT: [[TMP49:%.*]] = sub <2 x i32> [[TMP45]], [[TMP48]]
+; CHECK-NEXT: [[TMP50:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> <i1 true, i1 true>, i32 2)
+; CHECK-NEXT: [[TMP51:%.*]] = zext <2 x i8> [[TMP50]] to <2 x i32>
+; CHECK-NEXT: [[TMP52:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> getelementptr (i8, <2 x ptr> zeroinitializer, <2 x i64> <i64 6, i64 4>), i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
+; CHECK-NEXT: [[TMP53:%.*]] = zext <2 x i8> [[TMP52]] to <2 x i32>
+; CHECK-NEXT: [[TMP54:%.*]] = sub <2 x i32> [[TMP51]], [[TMP53]]
+; CHECK-NEXT: [[TMP55:%.*]] = shl <2 x i32> [[TMP54]], <i32 16, i32 16>
+; CHECK-NEXT: [[TMP56:%.*]] = add <2 x i32> [[TMP55]], [[TMP49]]
+; CHECK-NEXT: [[TMP57:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> getelementptr (i8, <2 x ptr> zeroinitializer, <2 x i64> <i64 3, i64 1>), i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
+; CHECK-NEXT: [[TMP58:%.*]] = zext <2 x i8> [[TMP57]] to <2 x i32>
+; CHECK-NEXT: [[TMP59:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> getelementptr (i8, <2 x ptr> zeroinitializer, <2 x i64> <i64 3, i64 1>), i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
; CHECK-NEXT: [[TMP60:%.*]] = zext <2 x i8> [[TMP59]] to <2 x i32>
-; CHECK-NEXT: [[TMP61:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> getelementptr (i8, <2 x ptr> zeroinitializer, <2 x i64> <i64 6, i64 4>), i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
-; CHECK-NEXT: [[TMP62:%.*]] = zext <2 x i8> [[TMP61]] to <2 x i32>
-; CHECK-NEXT: [[TMP63:%.*]] = sub <2 x i32> [[TMP60]], [[TMP62]]
-; CHECK-NEXT: [[TMP64:%.*]] = shl <2 x i32> [[TMP63]], <i32 16, i32 16>
-; CHECK-NEXT: [[TMP65:%.*]] = add <2 x i32> [[TMP64]], [[TMP58]]
-; CHECK-NEXT: [[TMP66:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> getelementptr (i8, <2 x ptr> zeroinitializer, <2 x i64> <i64 3, i64 1>), i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
-; CHECK-NEXT: [[TMP67:%.*]] = zext <2 x i8> [[TMP66]] to <2 x i32>
-; CHECK-NEXT: [[TMP68:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> getelementptr (i8, <2 x ptr> zeroinitializer, <2 x i64> <i64 3, i64 1>), i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
-; CHECK-NEXT: [[TMP69:%.*]] = zext <2 x i8> [[TMP68]] to <2 x i32>
-; CHECK-NEXT: [[TMP70:%.*]] = sub <2 x i32> [[TMP67]], [[TMP69]]
-; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i8> poison, i8 [[TMP51]], i32 0
-; CHECK-NEXT: [[TMP72:%.*]] = insertelement <2 x i8> [[TMP71]], i8 [[TMP50]], i32 1
-; CHECK-NEXT: [[TMP73:%.*]] = zext <2 x i8> [[TMP72]] to <2 x i32>
-; CHECK-NEXT: [[TMP74:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> getelementptr (i8, <2 x ptr> zeroinitializer, <2 x i64> <i64 7, i64 5>), i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
-; CHECK-NEXT: [[TMP75:%.*]] = zext <2 x i8> [[TMP74]] to <2 x i32>
-; CHECK-NEXT: [[TMP76:%.*]] = sub <2 x i32> [[TMP73]], [[TMP75]]
-; CHECK-NEXT: [[TMP77:%.*]] = shl <2 x i32> [[TMP76]], <i32 16, i32 16>
-; CHECK-NEXT: [[TMP78:%.*]] = add <2 x i32> [[TMP77]], [[TMP70]]
-; CHECK-NEXT: [[TMP79:%.*]] = sub <2 x i32> [[TMP65]], [[TMP78]]
-; CHECK-NEXT: [[TMP80:%.*]] = shufflevector <2 x i32> [[TMP78]], <2 x i32> [[TMP46]], <2 x i32> <i32 1, i32 2>
-; CHECK-NEXT: [[TMP81:%.*]] = shufflevector <2 x i32> [[TMP65]], <2 x i32> [[TMP30]], <2 x i32> <i32 1, i32 2>
-; CHECK-NEXT: [[TMP82:%.*]] = add <2 x i32> [[TMP80]], [[TMP81]]
-; CHECK-NEXT: [[TMP83:%.*]] = shufflevector <2 x i32> [[TMP78]], <2 x i32> [[TMP46]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP84:%.*]] = shufflevector <2 x i32> [[TMP65]], <2 x i32> [[TMP30]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP85:%.*]] = add <2 x i32> [[TMP83]], [[TMP84]]
-; CHECK-NEXT: [[TMP86:%.*]] = add <2 x i32> [[TMP85]], [[TMP82]]
-; CHECK-NEXT: [[TMP87:%.*]] = sub <2 x i32> [[TMP82]], [[TMP85]]
-; CHECK-NEXT: [[TMP88:%.*]] = extractelement <2 x i32> [[TMP79]], i32 0
-; CHECK-NEXT: [[TMP89:%.*]] = extractelement <2 x i32> [[TMP79]], i32 1
-; CHECK-NEXT: [[SUB59_3:%.*]] = sub i32 [[TMP89]], [[TMP88]]
-; CHECK-NEXT: [[TMP90:%.*]] = extractelement <2 x i32> [[TMP86]], i32 0
-; CHECK-NEXT: [[TMP91:%.*]] = extractelement <2 x i32> [[TMP86]], i32 1
-; CHECK-NEXT: [[ADD94:%.*]] = add i32 [[TMP90]], [[TMP91]]
-; CHECK-NEXT: [[SUB102:%.*]] = sub i32 [[TMP91]], [[TMP90]]
-; CHECK-NEXT: [[TMP92:%.*]] = extractelement <2 x i32> [[TMP54]], i32 1
-; CHECK-NEXT: [[SHR_I:%.*]] = lshr i32 [[TMP92]], 15
+; CHECK-NEXT: [[TMP61:%.*]] = sub <2 x i32> [[TMP58]], [[TMP60]]
+; CHECK-NEXT: [[TMP62:%.*]] = insertelement <2 x i8> poison, i8 [[TMP42]], i32 0
+; CHECK-NEXT: [[TMP63:%.*]] = insertelement <2 x i8> [[TMP62]], i8 [[TMP41]], i32 1
+; CHECK-NEXT: [[TMP64:%.*]] = zext <2 x i8> [[TMP63]] to <2 x i32>
+; CHECK-NEXT: [[TMP65:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> getelementptr (i8, <2 x ptr> zeroinitializer, <2 x i64> <i64 7, i64 5>), i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> poison)
+; CHECK-NEXT: [[TMP66:%.*]] = zext <2 x i8> [[TMP65]] to <2 x i32>
+; CHECK-NEXT: [[TMP67:%.*]] = sub <2 x i32> [[TMP64]], [[TMP66]]
+; CHECK-NEXT: [[TMP68:%.*]] = shl <2 x i32> [[TMP67]], <i32 16, i32 16>
+; CHECK-NEXT: [[TMP69:%.*]] = add <2 x i32> [[TMP68]], [[TMP61]]
+; CHECK-NEXT: [[TMP70:%.*]] = add <2 x i32> [[TMP69]], [[TMP56]]
+; CHECK-NEXT: [[TMP71:%.*]] = sub <2 x i32> [[TMP56]], [[TMP69]]
+; CHECK-NEXT: [[TMP72:%.*]] = extractelement <2 x i32> [[TMP70]], i32 0
+; CHECK-NEXT: [[TMP73:%.*]] = extractelement <2 x i32> [[TMP70]], i32 1
+; CHECK-NEXT: [[ADD48_3:%.*]] = add i32 [[TMP72]], [[TMP73]]
+; CHECK-NEXT: [[ADD94:%.*]] = add i32 [[ADD48_3]], [[ADD48_2]]
+; CHECK-NEXT: [[SUB102:%.*]] = sub i32 [[ADD48_2]], [[ADD48_3]]
+; CHECK-NEXT: [[TMP74:%.*]] = extractelement <2 x i32> [[TMP45]], i32 1
+; CHECK-NEXT: [[SHR_I:%.*]] = lshr i32 [[TMP74]], 15
; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537
; CHECK-NEXT: [[MUL_I:%.*]] = mul i32 [[AND_I]], 65535
-; CHECK-NEXT: [[TMP93:%.*]] = extractelement <2 x i32> [[TMP85]], i32 1
-; CHECK-NEXT: [[SHR_I49:%.*]] = lshr i32 [[TMP93]], 15
+; CHECK-NEXT: [[SHR_I49:%.*]] = lshr i32 [[ADD46_2]], 15
; CHECK-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/83468
More information about the llvm-commits
mailing list