[llvm] [WIP][LoopIdiomVectorize] Recognize and transform minidx pattern (PR #144987)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 19 23:22:40 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Madhur Amilkanthwar (madhur13490)
<details>
<summary>Changes</summary>
This patch vectorizes the case where the array scan happens backwards and first minidx is returned. Motivating example is found in rnflow FORTRAN benchmark.
Pre-commit test can be found as part of #<!-- -->141556
---
Patch is 62.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144987.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp (+714)
- (added) llvm/test/Transforms/LoopVectorize/last-min-index-ftn.ll (+291)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
index 491f0b76f4ae0..afb6f6aea4d59 100644
--- a/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
@@ -70,10 +70,12 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <cstdint>
using namespace llvm;
using namespace PatternMatch;
@@ -99,6 +101,11 @@ static cl::opt<bool>
cl::desc("Proceed with Loop Idiom Vectorize Pass, but do "
"not convert byte-compare loop(s)."));
+static cl::opt<bool> DisableMinMaxlocPattern(
+ "disable-loop-idiom-vectorize-minmaxloc", cl::Hidden, cl::init(false),
+ cl::desc("Proceed with Loop Idiom Vectorize Pass, but do "
+ "not convert minloc/maxloc loop(s)."));
+
static cl::opt<unsigned>
ByteCmpVF("loop-idiom-vectorize-bytecmp-vf", cl::Hidden,
cl::desc("The vectorization factor for byte-compare patterns."),
@@ -149,6 +156,13 @@ class LoopIdiomVectorize {
bool recognizeByteCompare();
+ bool recognizeMinIdxPattern();
+
+ bool transformMinIdxPattern(unsigned VF, Value *FirstIndex,
+ Value *SecondIndex, BasicBlock *LoopPreheader,
+ Value *BasePtr, BasicBlock *Header,
+ BasicBlock *ExitBB, Type *LoadType);
+
Value *expandFindMismatch(IRBuilder<> &Builder, DomTreeUpdater &DTU,
GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
Instruction *Index, Value *Start, Value *MaxLen);
@@ -239,9 +253,709 @@ bool LoopIdiomVectorize::run(Loop *L) {
if (recognizeFindFirstByte())
return true;
+ if (recognizeMinIdxPattern())
+ return true;
+
return false;
}
+bool LoopIdiomVectorize::recognizeMinIdxPattern() {
+ BasicBlock *Header = CurLoop->getHeader();
+ Function *F = Header->getParent();
+ BasicBlock *LoopPreheader = CurLoop->getLoopPreheader();
+
+ if (!TTI->supportsScalableVectors() || DisableMinMaxlocPattern) {
+ LLVM_DEBUG(dbgs() << "Does not meet pre-requisites for minidx idiom\n");
+ return false;
+ }
+
+ if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) {
+ LLVM_DEBUG(dbgs() << "Loop does not match the required number of "
+ "have 1 back edge and 3 blocks and backedges\n");
+ return false;
+ }
+
+ if (Header->sizeWithoutDebug() < 14) {
+ LLVM_DEBUG(dbgs() << "Header block is too small for minloc pattern\n");
+ return false;
+ }
+
+ // We need the below things to be able to transform the pattern:
+ // 1. Fist index. For this we look at the terminator instruction of
+ // the predecessor of the loop preheader. The condition of the terminator
+ // instruction decides whether to jump to scalar loop.
+ // 2. Second index.
+ // 3. Base pointer.
+ // For 2 and 3, we iterate backward from the header block to find the select
+ // instruction. The select instruction should be of the form select (fcmp
+ // contract olt loadA, loadB). Firther details below. Once we find the
+ // required pattern, we can extract the base pointer from the first load
+ // instruction
+ // 4. Exit basic block. For this we look at the terminator instruction of the
+ // header block.
+
+ // Extract the first index from the preheader.
+ // Example LLVM IR to inspect:
+ // %4 = load i32, ptr %1, align 4
+ // %5 = load i32, ptr %0, align 4
+ // %6 = sext i32 %5 to i64
+ // %7 = sub i32 0, %4
+ // %8 = sext i32 %7 to i64
+ // %9 = add nsw i64 %8, %6
+ // %10 = sub nsw i64 0, %9
+ // %invariant.gep = ...
+ // %invariant.gep1 = ...
+ // %11 = icmp slt i64 %9, 0
+ // br i1 %11, label %.loop_preheader, ...
+ Value *ICmpSLTFirstVal = nullptr, *FirstIndex = nullptr;
+ BasicBlock *LoopPreheaderBB = nullptr, *RetBB = nullptr;
+ BasicBlock *PreheaderPred = LoopPreheader->getSinglePredecessor();
+ if (!match(PreheaderPred->getTerminator(),
+ m_Br(m_SpecificICmp(ICmpInst::ICMP_SLT, m_Value(ICmpSLTFirstVal),
+ m_ZeroInt()),
+ m_BasicBlock(LoopPreheaderBB), m_BasicBlock(RetBB)))) {
+ LLVM_DEBUG(dbgs() << "Terminator doesn't match expected pattern\n");
+ return false;
+ }
+
+ // The Add operand can be either below:
+ // 1. add(sext(sub(0 - ipos2)), sext(ipos1))
+ // 2. add(sext(ipos1), sext(sub(0 - ipos2)))
+ // This depends on whether canonicalization has been done or not.
+ if (match(ICmpSLTFirstVal, m_Add(m_SExt(m_Sub(m_ZeroInt(), m_Value())),
+ (m_SExt(m_Value()))))) {
+ FirstIndex = dyn_cast<Instruction>(ICmpSLTFirstVal)->getOperand(1);
+ } else if (match(ICmpSLTFirstVal,
+ m_Add(m_SExt(m_Value()),
+ m_SExt(m_Sub(m_ZeroInt(), m_Value()))))) {
+ FirstIndex = dyn_cast<Instruction>(ICmpSLTFirstVal)->getOperand(0);
+ } else {
+ LLVM_DEBUG(dbgs() << "Cannot extract FirstIndex from ICmpSLTFirstVal\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "FirstIndex is " << *FirstIndex << "\n");
+
+ BasicBlock::reverse_iterator RI = Header->rbegin();
+ SelectInst *SelectToInspect = nullptr;
+ Value *BasePtr = nullptr;
+ Instruction *Trunc = nullptr;
+
+ // Iterate in backward direction to extract the select instruction which
+ // matches the pattern:
+
+ // %load1_gep = getelementptr float, ptr %invariant.gep, i64 %indvars.iv
+ // %load1 = load float, ptr %load1_gep, align 4
+ // %load2_gep = getelementptr float, ptr ..., ...
+ // %load2 = load float, ptr %load2_gep, align 4
+ // %trunc = trunc nsw i64 %indvars.iv.next to i32
+ // %fcmp = fcmp contract olt float %load1, %load2
+ // %select = select i1 %fcmp, i32 %trunc, i32 <phi>
+ // %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ while (RI != Header->rend()) {
+ if (auto *Sel = dyn_cast<SelectInst>(&*RI)) {
+ if (match(Sel, m_Select(m_SpecificFCmp(
+ FCmpInst::FCMP_OLT,
+ m_Load(m_GEP(m_Value(BasePtr), m_Value())),
+ m_Load(m_GEP(m_Value(), m_Value()))),
+ m_Instruction(Trunc), m_Value()))) {
+ SelectToInspect = Sel;
+ }
+ }
+ ++RI;
+ }
+ if (!SelectToInspect || !BasePtr) {
+ LLVM_DEBUG(dbgs() << "Select or BasePtr not found\n");
+ return false;
+ }
+
+ // Extract FCmp and validate load types
+ auto *FCmp = dyn_cast<FCmpInst>(SelectToInspect->getCondition());
+ if (!FCmp || !isa<LoadInst>(FCmp->getOperand(0)) ||
+ !isa<LoadInst>(FCmp->getOperand(1)))
+ return false;
+
+ auto *LoadA = cast<LoadInst>(FCmp->getOperand(0));
+ auto *LoadB = cast<LoadInst>(FCmp->getOperand(1));
+
+ if (LoadA->getType() != LoadB->getType()) {
+ LLVM_DEBUG(dbgs() << "Load types don't match\n");
+ return false;
+ }
+
+ // Validate truncation instruction matches expected pattern
+ TruncInst *TInst = dyn_cast<TruncInst>(Trunc);
+ if (!TInst || TInst->getDestTy() != F->getReturnType()) {
+ LLVM_DEBUG(dbgs() << "Trunc instruction validation failed\n");
+ return false;
+ }
+ // Trunc instruction's operand should be of the form (add IVPHI, -1).
+ Instruction *IVInst = nullptr;
+ if (!match(TInst->getOperand(0),
+ m_Add(m_Instruction(IVInst), m_SpecificInt(-1)))) {
+ LLVM_DEBUG(
+ dbgs() << "Trunc instruction operand doesn't match expected pattern\n");
+ return false;
+ }
+
+ PHINode *IVPhi = dyn_cast<PHINode>(IVInst);
+ if (!IVPhi) {
+ LLVM_DEBUG(dbgs() << "Add operand of trunc instruction is not a PHINode\n");
+ return false;
+ }
+
+ Value *SecondIndex = IVPhi->getIncomingValueForBlock(LoopPreheader);
+ LLVM_DEBUG(dbgs() << "SecondIndex is " << *SecondIndex << "\n");
+
+ // 4. Inspect Terminator to extract the exit block.
+ // Example LLVM IR to inspect:
+ // %20 = icmp sgt i64 %13, 1
+ // br i1 %20, label %.lr.ph, label %._crit_edge.loopexit
+ Value *ICmpFirstVal = nullptr;
+ BasicBlock *FalseBB = nullptr;
+ BranchInst *Terminator = dyn_cast<BranchInst>(Header->getTerminator());
+ if (!match(Terminator, m_Br(m_SpecificICmp(ICmpInst::ICMP_SGT,
+ m_Value(ICmpFirstVal), m_One()),
+ m_BasicBlock(Header), m_BasicBlock(FalseBB)))) {
+ LLVM_DEBUG(dbgs() << "Terminator doesn't match expected pattern\n");
+ return false;
+ }
+
+ unsigned VF = 128 / LoadA->getType()->getPrimitiveSizeInBits();
+
+ // We've recognized the pattern, now transform it.
+ LLVM_DEBUG(dbgs() << "FOUND MINIDX PATTERN\n");
+
+ return transformMinIdxPattern(VF, FirstIndex, SecondIndex, LoopPreheader,
+ BasePtr, Header, FalseBB, LoadA->getType());
+}
+
+bool LoopIdiomVectorize::transformMinIdxPattern(
+ unsigned VF, Value *FirstIndex, Value *SecondIndex,
+ BasicBlock *LoopPreheader, Value *BasePtr, BasicBlock *Header,
+ BasicBlock *ExitBB, Type *LoadType) {
+
+ LLVMContext &Ctx = Header->getContext();
+ Function *F = Header->getParent();
+ Module *M = F->getParent();
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ Type *I32Ty = Type::getInt32Ty(Ctx);
+ Type *I64Ty = Type::getInt64Ty(Ctx);
+ Type *I1Ty = Type::getInt1Ty(Ctx);
+ Type *PointerType = PointerType::get(Ctx, 0);
+ auto *MaskTy = ScalableVectorType::get(Type::getInt1Ty(Ctx), 4);
+ auto *VecTy = ScalableVectorType::get(
+ LoadType, VF); // This is the vector type for i32 values
+
+ BasicBlock *VecEntry = BasicBlock::Create(Ctx, "minidx.vec.entry", F);
+ BasicBlock *MinIdxPartial1If =
+ BasicBlock::Create(Ctx, "minidx.partial.1.if", F);
+ BasicBlock *MinIdxPartial1ProcExit =
+ BasicBlock::Create(Ctx, "minidx.partial.1.proc.exit", F);
+ BasicBlock *MinIdxWhileBodyLrPh =
+ BasicBlock::Create(Ctx, "minidx.while.body.ph", F);
+ BasicBlock *MinIdxVectBody = BasicBlock::Create(Ctx, "minidx.vect.body", F);
+ BasicBlock *MinIdxVectUpdate =
+ BasicBlock::Create(Ctx, "minidx.vect.update", F);
+ BasicBlock *MinIdxVectContinue =
+ BasicBlock::Create(Ctx, "minidx.vect.continue", F);
+ BasicBlock *MinIdxVectEnd = BasicBlock::Create(Ctx, "minidx.vect.end", F);
+ BasicBlock *MinIdxPartial2If =
+ BasicBlock::Create(Ctx, "minidx.partial.2.if", F);
+ BasicBlock *MinIdxPartial2Exit =
+ BasicBlock::Create(Ctx, "minidx.partial.2.exit", F);
+ BasicBlock *MinIdxEnd = BasicBlock::Create(Ctx, "minidx.end", F);
+
+ Loop *VecLoop = LI->AllocateLoop();
+ VecLoop->addBasicBlockToLoop(MinIdxVectBody, *LI);
+ VecLoop->addBasicBlockToLoop(MinIdxVectUpdate, *LI);
+ VecLoop->addBasicBlockToLoop(MinIdxVectContinue, *LI);
+
+ LI->addTopLevelLoop(VecLoop);
+
+ // Start populating preheader.
+ IRBuilder<> Builder(LoopPreheader->getTerminator());
+ // %VScale = tail call i64 @llvm.vscale.i64()
+ // %VLen = shl nuw nsw i64 %VScale, 2
+ // %minidx.not = sub nsw i64 0, %VLen
+ // %minidx.and = and i64 %ipos2, %minidx.not
+ Value *GMax = Builder.CreateVectorSplat(ElementCount::getScalable(VF),
+ ConstantFP::getInfinity(LoadType, 0),
+ "minloc.gmax");
+ Value *VScale = Builder.CreateVScale(I64Ty);
+ Value *VLen =
+ Builder.CreateShl(VScale, ConstantInt::get(I64Ty, 2), "minidx.vlen");
+ Value *Not =
+ Builder.CreateSub(ConstantInt::get(I64Ty, 0), VLen, "minidx.not");
+ // Value *Ipos2Minus1 = Builder.CreateSub(IncomingPos2,
+ // ConstantInt::get(I64Ty, 1), "minidx.ipos2.minus1");
+ Value *And = Builder.CreateAnd(SecondIndex, Not, "minidx.and");
+
+ // %minidx.umax = tail call i64 @llvm.umax.i64(i64 %minidx.and, i64 %ipos1)
+ // %minidx.add = add i64 %ipos2, 1
+ Value *Umax = Builder.CreateIntrinsic(
+ Intrinsic::smax, {I64Ty}, {And, FirstIndex}, nullptr, "minidx.umax");
+ Value *Add =
+ Builder.CreateAdd(SecondIndex, ConstantInt::get(I64Ty, 1), "minidx.add");
+ // %minidx.mask = call <vscale x 4 x i1>
+ // @llvm.get.active.lane.mask.nxv4i1.i64(i64 %minidx.umax, i64 %minidx.add)
+ Value *MinlocMask = Builder.CreateCall(
+ Intrinsic::getOrInsertDeclaration(M, Intrinsic::get_active_lane_mask,
+ {MaskTy, I64Ty}),
+ {Umax, Add}, "minidx.mask");
+
+ // %minidx.add.ptr.i = getelementptr inbounds nuw float, ptr %p, i64
+ // %minidx.umax %minidx.masked.load = tail call <vscale x 4 x float>
+ // @llvm.masked.load.nxv4f32.p0(ptr %minidx.add.ptr.i, i32 1, <vscale x 4 x
+ // i1> %minidx.mask, <vscale x 4 x float> zeroinitializer) %minidx.currentVals
+ // = select <vscale x 4 x i1> %minidx.mask, <vscale x 4 x float>
+ // %minidx.masked.load, <vscale x 4 x float> splat (float 0x7FF0000000000000)
+ // %minidx.reverse = tail call <vscale x 4 x i1>
+ // @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> %minidx.mask)
+ // %minidx.reverseVals = tail call <vscale x 4 x float>
+ // @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %minidx.currentVals)
+ // %minidx.minVal = call float @llvm.vector.reduce.fminimum.nxv4f32(<vscale x
+ // 4 x float> %minidx.reverseVals)
+
+ Value *UmaxMinus1 =
+ Builder.CreateSub(Umax, ConstantInt::get(I64Ty, 1), "minidx.umax.minus1");
+ Value *AddPtrI = Builder.CreateInBoundsGEP(LoadType, BasePtr, UmaxMinus1,
+ "minidx.add.ptr.i");
+
+ Value *LoadVals =
+ Builder.CreateCall(Intrinsic::getOrInsertDeclaration(
+ M, Intrinsic::masked_load, {VecTy, PointerType}),
+ {AddPtrI, ConstantInt::get(I32Ty, 1), MinlocMask,
+ Constant::getNullValue(VecTy)},
+ "minidx.loadVals");
+ Value *CurrentVals =
+ Builder.CreateSelect(MinlocMask, LoadVals, GMax, "minidx.currentVals");
+ Value *Reverse = Builder.CreateCall(
+ Intrinsic::getOrInsertDeclaration(M, Intrinsic::vector_reverse, {MaskTy}),
+ {MinlocMask}, "minidx.reverse");
+ Value *ReverseVals = Builder.CreateCall(
+ Intrinsic::getOrInsertDeclaration(M, Intrinsic::vector_reverse, {VecTy}),
+ {CurrentVals}, "minidx.reverseVals");
+ Value *MinVal =
+ Builder.CreateCall(Intrinsic::getOrInsertDeclaration(
+ M, Intrinsic::vector_reduce_fminimum, {VecTy}),
+ {ReverseVals}, "minidx.minVal");
+
+ Builder.CreateCondBr(Builder.getTrue(), VecEntry, Header);
+ LoopPreheader->getTerminator()->eraseFromParent();
+
+ // Add edge from preheader to VecEntry
+ DTU.applyUpdates({{DominatorTree::Insert, LoopPreheader, VecEntry}});
+
+ // %minidx.entry.cmp = fcmp olt float %minidx.minVal, %init
+ // br i1 %minidx.entry.cmp, label %minidx.partial.1.if, label
+ // %minidx.partial.1.proc.exit
+ Builder.SetInsertPoint(VecEntry);
+ Value *VecEntryCmp = Builder.CreateFCmpOLT(
+ MinVal, ConstantFP::getInfinity(LoadType, 0), "minidx.entry.cmp");
+ Builder.CreateCondBr(VecEntryCmp, MinIdxPartial1If, MinIdxPartial1ProcExit);
+
+ // Connect edges from VecEntry to MinIdxPartial1If and MinIdxPartial1ProcExit
+ DTU.applyUpdates({{DominatorTree::Insert, VecEntry, MinIdxPartial1If},
+ {DominatorTree::Insert, VecEntry, MinIdxPartial1ProcExit}});
+
+ Builder.SetInsertPoint(MinIdxPartial1If);
+ // %minVal.splatinsert = insertelement <vscale x 4 x float> poison, float
+ // %minidx.minVal, i64 0 %minVal.splat = shufflevector <vscale x 4 x float>
+ // %minVal.splatinsert, <vscale x 4 x float> poison, <vscale x 4 x i32>
+ // zeroinitializer
+ Value *MinValSplat = Builder.CreateVectorSplat(ElementCount::getScalable(VF),
+ MinVal, "minval.splat");
+ // %minidx.partial.1.cmp = fcmp oeq <vscale x 4 x float> %minidx.reverseVals,
+ // %minVal.splat %minidx.partial.1.and = and <vscale x 4 x i1>
+ // %minidx.reverse, %minidx.partial.1.cmp %minidx.partial.1.cttz = tail call
+ // i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1>
+ // %minidx.partial.1.and, i1 true)
+ Value *FirstPartialCmp =
+ Builder.CreateFCmpOEQ(ReverseVals, MinValSplat, "minidx.partial.1.cmp");
+ Value *FirstPartialAnd =
+ Builder.CreateAnd(Reverse, FirstPartialCmp, "minidx.partial.1.and");
+ Value *FirstPartialCTTZ = Builder.CreateCountTrailingZeroElems(
+ I64Ty, FirstPartialAnd, ConstantInt::get(I1Ty, 1),
+ "minidx.partial.1.cttz");
+
+ // FIXME this pattern
+ // %minidx.partial.1.xor = xor i64 %minidx.partial.1.cttz, -1
+ // %minidx.partial.1.add1 = add i64 %minidx.umax, %VLen
+ // %minidx.partial.1.add2 = add i64 %minidx.partial.1.add1,
+ // %minidx.partial.1.xor br label %minidx.partial.1.proc.exit
+ Value *FirstPartialTmp1 =
+ Builder.CreateSub(VLen, FirstPartialCTTZ, "minidx.partial.1.tmp");
+ Value *FirstPartialTmp =
+ Builder.CreateSub(FirstPartialTmp1, ConstantInt::get(I64Ty, 1),
+ "minidx.partial.1.tmp.minus1");
+ Value *FirstPartialAdd2 =
+ Builder.CreateAdd(Umax, FirstPartialTmp, "minidx.partial.1.add2");
+
+ Builder.CreateBr(MinIdxPartial1ProcExit);
+
+ DTU.applyUpdates(
+ {{DominatorTree::Insert, MinIdxPartial1If, MinIdxPartial1ProcExit}});
+
+ Builder.SetInsertPoint(MinIdxPartial1ProcExit);
+ // %minidx.partial.1.exit.known_min = phi float [ %minidx.minVal,
+ // %minidx.partial.1.if ], [ %init, %entry ] %partial1.exit.known_arg = phi
+ // i64 [ %minidx.partial.1.add2, %minidx.partial.1.if ], [ 0, %entry ]
+ PHINode *Partial1ExitKnownMin =
+ Builder.CreatePHI(LoadType, 2, "minidx.partial.1.exit.known_min");
+ PHINode *Partial1ExitKnownArg =
+ Builder.CreatePHI(I64Ty, 2, "partial1.exit.known_arg");
+
+ Partial1ExitKnownMin->addIncoming(MinVal, MinIdxPartial1If);
+ Partial1ExitKnownMin->addIncoming(ConstantFP::getInfinity(LoadType, 0),
+ VecEntry);
+ Partial1ExitKnownArg->addIncoming(FirstPartialAdd2, MinIdxPartial1If);
+ Partial1ExitKnownArg->addIncoming(ConstantInt::get(I64Ty, 0), VecEntry);
+
+ // %minidx.partial.1.proc.exit.add = add i64 %VLen, %ipos1
+ // %minidx.partial.1.proc.exit.icmp = icmp ult i64 %minidx.umax,
+ // %minidx.partial.1.proc.exit.add br i1 %minidx.partial.1.proc.exit.icmp,
+ // label %minidx.vect.end, label %minidx.while.body.ph
+ Value *MinIdxPartial1ProcExitAdd =
+ Builder.CreateAdd(VLen, FirstIndex, "minidx.partial.1.proc.exit.add");
+ Value *MinIdxPartial1ProcExitICmp = Builder.CreateICmpULT(
+ Umax, MinIdxPartial1ProcExitAdd, "minidx.partial.1.proc.exit.icmp");
+ Builder.CreateCondBr(MinIdxPartial1ProcExitICmp, MinIdxVectEnd,
+ MinIdxWhileBodyLrPh);
+
+ DTU.applyUpdates(
+ {{DominatorTree::Insert, MinIdxPartial1ProcExit, MinIdxVectEnd},
+ {DominatorTree::Insert, MinIdxPartial1ProcExit, MinIdxWhileBodyLrPh}});
+
+ Builder.SetInsertPoint(MinIdxWhileBodyLrPh);
+ // %minidx.while.body.ph.mul = mul nsw i64 %VScale, -16
+ // %minidx.while.body.ph.gep = getelementptr i8, ptr %p, i64
+ // %minidx.while.body.ph.mul br label %minidx.vect.body
+ Builder.CreateBr(MinIdxVectBody);
+
+ DTU.applyUpdates(
+ {{DominatorTree::Insert, MinIdxWhileBodyLrPh, MinIdxVectBody}});
+
+ Builder.SetInsertPoint(MinIdxVectBody);
+ // %minidx.vect.body.phi1 = phi i64 [ %minidx.umax, %minidx.while.body.ph ], [
+ // %minidx.vect.body.sub, %minidx.vect.continue ] %minidx.vect.body.known_arg
+ // = phi i64 [ %partial1.exit.known_arg, %minidx.while.body.ph ], [
+ // %minidx.vect.continue.known_arg, %minidx.vect.continue ]
+ // %minidx.vect.body.known_min = phi float [ %minidx.partial.1.exit.known_min,
+ // %minidx.while.body.ph ], [ %minidx.vect.continue.known_min,
+ // %minidx.vect.continue ]
+ PHINode *MinIdxVectBodyPhi1 =
+ Builder.CreatePHI(I64Ty, 2, "minidx.vect.body.phi1");
+ PHINode *MinIdxVectBodyKnownArg =
+ Builder.CreatePHI(I64Ty, 2, "minidx.vect.body.known_arg");
+ PHINode *MinIdxVectBodyKnownMin =
+ Builder.CreatePHI(LoadType, 2, "minidx.vect.body.known_min");
+
+ // %minidx.vect.body.sub = sub i64 %minidx.vect.body.phi1, %VLen
+ // %minidx.vect.body.shl = shl i64 %minidx.vect.body.phi1, 2
+ // %minid...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/144987
More information about the llvm-commits
mailing list