[llvm] [VPlan] Refine VPVectorPointer printing and return false for mayHaveSideEffects (PR #113410)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 22 20:06:56 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Shih-Po Hung (arcbbb)
<details>
<summary>Changes</summary>
Following #<!-- -->110974, update VPVectorPointerRecipe as well.
---
Patch is 102.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113410.diff
23 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+10-3)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+55-9)
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+4-3)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+56-28)
- (modified) llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp (+2-2)
- (modified) llvm/lib/Transforms/Vectorize/VPlanValue.h (+1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll (+58-72)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll (+2-3)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+12-8)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll (+56-64)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-model.ll (+57-61)
- (modified) llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/dead_instructions.ll (+5-8)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/reverse_induction.ll (+12-12)
- (modified) llvm/test/Transforms/LoopVectorize/use-iv-start-value.ll (-6)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing.ll (+21-21)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+3-2)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6e082b1c134dee..220625908dff9e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4442,6 +4442,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPInstructionSC:
case VPDef::VPCanonicalIVPHISC:
case VPDef::VPVectorPointerSC:
+ case VPDef::VPReverseVectorPointerSC:
case VPDef::VPExpandSCEVSC:
case VPDef::VPEVLBasedIVPHISC:
case VPDef::VPPredInstPHISC:
@@ -8160,9 +8161,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
if (Consecutive) {
auto *GEP = dyn_cast<GetElementPtrInst>(
Ptr->getUnderlyingValue()->stripPointerCasts());
- auto *VectorPtr = new VPVectorPointerRecipe(
- Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false,
- I->getDebugLoc());
+ VPSingleDefRecipe *VectorPtr;
+ if (Reverse)
+ VectorPtr = new VPReverseVectorPointerRecipe(
+ Ptr, &Plan.getVF(), getLoadStoreType(I),
+ GEP ? GEP->isInBounds() : false, I->getDebugLoc());
+ else
+ VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
+ GEP ? GEP->isInBounds() : false,
+ I->getDebugLoc());
Builder.getInsertBlock()->appendRecipe(VectorPtr);
Ptr = VectorPtr;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 594492344d43ce..8df63b5796c739 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -882,6 +882,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPReplicateSC:
case VPRecipeBase::VPScalarIVStepsSC:
case VPRecipeBase::VPVectorPointerSC:
+ case VPRecipeBase::VPReverseVectorPointerSC:
case VPRecipeBase::VPWidenCallSC:
case VPRecipeBase::VPWidenCanonicalIVSC:
case VPRecipeBase::VPWidenCastSC:
@@ -1078,6 +1079,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
+ R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
}
@@ -1785,20 +1787,64 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
#endif
};
-/// A recipe to compute the pointers for widened memory accesses of IndexTy for
-/// all parts. If IsReverse is true, compute pointers for accessing the input in
-/// reverse order per part.
+/// A recipe to compute the pointers for widened memory accesses of IndexTy
+/// in reverse order.
+class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
+ public VPUnrollPartAccessor<2> {
+ Type *IndexedTy;
+
+public:
+ VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
+ bool IsInBounds, DebugLoc DL)
+ : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
+ ArrayRef<VPValue *>({Ptr, VF}),
+ GEPFlagsTy(IsInBounds), DL),
+ IndexedTy(IndexedTy) {}
+
+ VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
+
+ VPValue *getVFValue() { return getOperand(1); }
+ const VPValue *getVFValue() const { return getOperand(1); }
+
+ void execute(VPTransformState &State) override;
+
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+
+ /// Returns true if the recipe only uses the first part of operand \p Op.
+ bool onlyFirstPartUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ assert(getNumOperands() <= 2 && "must have at most two operands");
+ return true;
+ }
+
+ VPReverseVectorPointerRecipe *clone() override {
+ return new VPReverseVectorPointerRecipe(
+ getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
+/// A recipe to compute the pointers for widened memory accesses of IndexTy.
class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
public VPUnrollPartAccessor<1> {
Type *IndexedTy;
- bool IsReverse;
public:
- VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
- bool IsInBounds, DebugLoc DL)
+ VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
+ DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
GEPFlagsTy(IsInBounds), DL),
- IndexedTy(IndexedTy), IsReverse(IsReverse) {}
+ IndexedTy(IndexedTy) {}
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
@@ -1819,8 +1865,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
}
VPVectorPointerRecipe *clone() override {
- return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
- isInBounds(), getDebugLoc());
+ return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
+ getDebugLoc());
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 277df0637372d8..7bfbcc3dff0e1f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -261,9 +261,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
[](const auto *R) { return R->getScalarType(); })
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
- VPWidenCanonicalIVRecipe>([this](const VPRecipeBase *R) {
- return inferScalarType(R->getOperand(0));
- })
+ VPReverseVectorPointerRecipe, VPWidenCanonicalIVRecipe>(
+ [this](const VPRecipeBase *R) {
+ return inferScalarType(R->getOperand(0));
+ })
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
VPWidenSelectRecipe>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 0d092b9c10acc8..1b715a28604e33 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -154,6 +154,8 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPDerivedIVSC:
case VPPredInstPHISC:
case VPScalarCastSC:
+ case VPVectorPointerSC:
+ case VPReverseVectorPointerSC:
return false;
case VPInstructionSC:
return mayWriteToMemory();
@@ -1813,38 +1815,63 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
-void VPVectorPointerRecipe ::execute(VPTransformState &State) {
- auto &Builder = State.Builder;
- State.setDebugLocFrom(getDebugLoc());
- unsigned CurrentPart = getUnrollPart(*this);
+static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
+ unsigned CurrentPart, IRBuilderBase &Builder) {
// Use i32 for the gep index type when the value is constant,
// or query DataLayout for a more suitable index type otherwise.
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
- Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0)
- ? DL.getIndexType(Builder.getPtrTy(0))
- : Builder.getInt32Ty();
+ return IsScalable && (IsReverse || CurrentPart > 0)
+ ? DL.getIndexType(Builder.getPtrTy(0))
+ : Builder.getInt32Ty();
+}
+
+void VPReverseVectorPointerRecipe::execute(VPTransformState &State) {
+ auto &Builder = State.Builder;
+ State.setDebugLocFrom(getDebugLoc());
+ unsigned CurrentPart = getUnrollPart(*this);
+ Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
+ CurrentPart, Builder);
+
+ // The wide store needs to start at the last vector element.
+ Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
+ if (IndexTy != RunTimeVF->getType())
+ RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
+ // NumElt = -CurrentPart * RunTimeVF
+ Value *NumElt = Builder.CreateMul(
+ ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
+ // LastLane = 1 - RunTimeVF
+ Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
Value *Ptr = State.get(getOperand(0), VPLane(0));
bool InBounds = isInBounds();
+ Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
+ ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
- Value *ResultPtr = nullptr;
- if (IsReverse) {
- // If the address is consecutive but reversed, then the
- // wide store needs to start at the last vector element.
- // RunTimeVF = VScale * VF.getKnownMinValue()
- // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
- Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
- // NumElt = -CurrentPart * RunTimeVF
- Value *NumElt = Builder.CreateMul(
- ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
- // LastLane = 1 - RunTimeVF
- Value *LastLane =
- Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
- ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
- ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
- } else {
- Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
- ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
- }
+ State.set(this, ResultPtr, /*IsScalar*/ true);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent;
+ printAsOperand(O, SlotTracker);
+ O << " = reverse-vector-pointer ";
+ if (isInBounds())
+ O << "inbounds ";
+ printOperands(O, SlotTracker);
+}
+#endif
+
+void VPVectorPointerRecipe::execute(VPTransformState &State) {
+ auto &Builder = State.Builder;
+ State.setDebugLocFrom(getDebugLoc());
+ unsigned CurrentPart = getUnrollPart(*this);
+ Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
+ CurrentPart, Builder);
+ Value *Ptr = State.get(getOperand(0), VPLane(0));
+ bool InBounds = isInBounds();
+
+ Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
+ Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
State.set(this, ResultPtr, /*IsScalar*/ true);
}
@@ -1855,8 +1882,9 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
O << Indent;
printAsOperand(O, SlotTracker);
O << " = vector-pointer ";
- if (IsReverse)
- O << "(reverse) ";
+
+ if (isInBounds())
+ O << "inbounds ";
printOperands(O, SlotTracker);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index ca78f32506ef71..1e32865e8ee576 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -316,12 +316,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
// Add operand indicating the part to generate code for, to recipes still
// requiring it.
if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
- VPVectorPointerRecipe>(Copy) ||
+ VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(Copy) ||
match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
m_VPValue())))
Copy->addOperand(getConstantVPV(Part));
- if (isa<VPVectorPointerRecipe>(R))
+ if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R))
Copy->setOperand(0, R.getOperand(0));
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 4c383244f96f1a..d7626e437db338 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -346,6 +346,7 @@ class VPDef {
VPScalarCastSC,
VPScalarIVStepsSC,
VPVectorPointerSC,
+ VPReverseVectorPointerSC,
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenCastSC,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
index 81121019efe767..76562e80fbc4a1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
@@ -24,43 +24,36 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 3
+; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP4]], 4
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[INDEX]], -1
-; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[N]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP9]], 3
-; CHECK-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 3
-; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP14]]
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP15]]
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i64 [[TMP16]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, ptr [[TMP12]], align 8
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x double>, ptr [[TMP18]], align 8
-; CHECK-NEXT: [[TMP19:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP20:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD1]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP23:%.*]] = shl i64 [[TMP22]], 3
-; CHECK-NEXT: [[TMP24:%.*]] = sub i64 1, [[TMP23]]
-; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP24]]
-; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP26]], 3
-; CHECK-NEXT: [[TMP28:%.*]] = sub i64 0, [[TMP27]]
-; CHECK-NEXT: [[TMP29:%.*]] = sub i64 1, [[TMP27]]
-; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP28]]
-; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP29]]
-; CHECK-NEXT: store <vscale x 8 x double> [[TMP19]], ptr [[TMP25]], align 8
-; CHECK-NEXT: store <vscale x 8 x double> [[TMP20]], ptr [[TMP31]], align 8
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[INDEX]], -1
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[N]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP5]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = sub i64 0, [[TMP5]]
+; CHECK-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP5]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i64 [[TMP13]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, ptr [[TMP11]], align 8
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x double>, ptr [[TMP15]], align 8
+; CHECK-NEXT: [[TMP16:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP17:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD1]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP5]]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = sub i64 0, [[TMP5]]
+; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP5]]
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[TMP22]]
+; CHECK-NEXT: store <vscale x 8 x double> [[TMP16]], ptr [[TMP20]], align 8
+; CHECK-NEXT: store <vscale x 8 x double> [[TMP17]], ptr [[TMP24]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
+; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -75,8 +68,8 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
; CHECK-NEXT: [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[I_08]] = add nsw i64 [[I_08_IN]], -1
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_08]]
-; CHECK-NEXT: [[TMP33:%.*]] = load double, ptr [[ARRAYIDX]], align 8
-; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP33]], 1.000000e+00
+; CHECK-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP26]], 1.000000e+00
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]]
; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX1]], align 8
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1
@@ -126,43 +119,36 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP7:%.*]] = cal...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/113410
More information about the llvm-commits
mailing list