[llvm] bbd1941 - [VPlan] Add disjoint flag to VPRecipeWithIRFlags. (#74364)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 5 07:22:03 PST 2023
Author: Florian Hahn
Date: 2023-12-05T15:21:59Z
New Revision: bbd1941a38fc84e7d371517db5b3dc695c31893c
URL: https://github.com/llvm/llvm-project/commit/bbd1941a38fc84e7d371517db5b3dc695c31893c
DIFF: https://github.com/llvm/llvm-project/commit/bbd1941a38fc84e7d371517db5b3dc695c31893c.diff
LOG: [VPlan] Add disjoint flag to VPRecipeWithIRFlags. (#74364)
A new disjoint flag was added for OR instructions in #72583.
Update VPRecipeWithIRFlags to also support the new flag. This
allows printing and preserving the disjoint flag in vectorized code.
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll
llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
llvm/test/Transforms/LoopVectorize/vplan-printing.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 265493eb105a0..9002452204480 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -826,6 +826,7 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
enum class OperationType : unsigned char {
Cmp,
OverflowingBinOp,
+ DisjointOp,
PossiblyExactOp,
GEPOp,
FPMathOp,
@@ -842,6 +843,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
};
private:
+ struct DisjointFlagsTy {
+ char IsDisjoint : 1;
+ };
struct ExactFlagsTy {
char IsExact : 1;
};
@@ -868,6 +872,7 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
union {
CmpInst::Predicate CmpPredicate;
WrapFlagsTy WrapFlags;
+ DisjointFlagsTy DisjointFlags;
ExactFlagsTy ExactFlags;
GEPFlagsTy GEPFlags;
NonNegFlagsTy NonNegFlags;
@@ -889,6 +894,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
if (auto *Op = dyn_cast<CmpInst>(&I)) {
OpType = OperationType::Cmp;
CmpPredicate = Op->getPredicate();
+ } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
+ OpType = OperationType::DisjointOp;
+ DisjointFlags.IsDisjoint = Op->isDisjoint();
} else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
OpType = OperationType::OverflowingBinOp;
WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
@@ -942,6 +950,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
WrapFlags.HasNUW = false;
WrapFlags.HasNSW = false;
break;
+ case OperationType::DisjointOp:
+ DisjointFlags.IsDisjoint = false;
+ break;
case OperationType::PossiblyExactOp:
ExactFlags.IsExact = false;
break;
@@ -968,6 +979,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
I->setHasNoSignedWrap(WrapFlags.HasNSW);
break;
+ case OperationType::DisjointOp:
+ cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
+ break;
case OperationType::PossiblyExactOp:
I->setIsExact(ExactFlags.IsExact);
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 462cd201b6fcc..1891b211a3566 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -635,6 +635,10 @@ void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
case OperationType::Cmp:
O << " " << CmpInst::getPredicateName(getPredicate());
break;
+ case OperationType::DisjointOp:
+ if (DisjointFlags.IsDisjoint)
+ O << " disjoint";
+ break;
case OperationType::PossiblyExactOp:
if (ExactFlags.IsExact)
O << " exact";
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
index b09960f29f00b..0b5058cff8d5c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -122,7 +122,7 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1024 x i16], ptr @AB_i16, i64 0, <vscale x 4 x i64> [[VEC_IND]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP4]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison)
-; CHECK-NEXT: [[TMP5:%.*]] = or <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP5:%.*]] = or disjoint <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i16], ptr @AB_i16, i64 0, <vscale x 4 x i64> [[TMP5]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison)
; CHECK-NEXT: [[TMP7:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER]] to <vscale x 4 x i32>
@@ -217,7 +217,7 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 {
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
-; CHECK-NEXT: [[TMP7:%.*]] = or <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP7:%.*]] = or disjoint <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <vscale x 4 x i32> [[TMP5]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP9:%.*]] = trunc <vscale x 4 x i32> [[TMP8]] to <vscale x 4 x i16>
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr @CD_i16, i64 0, <vscale x 4 x i64> [[VEC_IND]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
index d82d9a2d10cd8..bf2b9e2aef85a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
@@ -24,7 +24,7 @@ define void @_Z3fn1v() #0 {
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP12]], i64 0
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x ptr> [[TMP13]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i64> [[VEC_IND3]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or disjoint <16 x i64> [[VEC_IND3]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
; CHECK-NEXT: [[TMP15:%.*]] = add nsw <16 x i64> [[TMP10]], [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP15]], i64 0
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x ptr> [[TMP16]], i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
diff --git a/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll b/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll
index cbe22fe8a905d..66509ffedd68b 100644
--- a/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll
+++ b/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll
@@ -18,7 +18,7 @@ define void @generate_disjoint_flags(i64 %n, ptr noalias %x) {
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[WIDE_LOAD]], <i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <4 x i32> [[TMP3]], [[TMP4]]
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP2]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
index 3d951da9feb27..2a2d55fb75c53 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
@@ -195,7 +195,7 @@ for.end:
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 2
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP11]], i32 3
; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP15]], [[VEC_PHI]]
-; CHECK-NEXT: [[TMP17:%.*]] = or <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT: [[TMP17:%.*]] = or disjoint <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP17]], i32 0
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP18]]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP17]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 5a7c3dcc8b686..f129cf3e2a1aa 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -773,7 +773,7 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) {
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%lv> = load ir<%gep.x>
-; CHECK-NEXT: WIDEN ir<%or.1> = or ir<%lv>, ir<1>
+; CHECK-NEXT: WIDEN ir<%or.1> = or disjoint ir<%lv>, ir<1>
; CHECK-NEXT: WIDEN ir<%or.2> = or ir<%lv>, ir<3>
; CHECK-NEXT: WIDEN ir<%add> = add nuw nsw ir<%or.1>, ir<%or.2>
; CHECK-NEXT: WIDEN store ir<%gep.x>, ir<%add>
More information about the llvm-commits
mailing list