[llvm] [GVN] Support rnflow pattern matching and transform (PR #162259)
Madhur Amilkanthwar via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 10 02:12:19 PST 2025
https://github.com/madhur13490 updated https://github.com/llvm/llvm-project/pull/162259
>From 8826265b637eed80a52b375a961add9d9f1d7919 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Fri, 15 Aug 2025 00:34:49 -0700
Subject: [PATCH 1/5] [GVN] Support rnflow pattern matching and transform
---
llvm/include/llvm/Transforms/Scalar/GVN.h | 4 +
llvm/lib/Transforms/Scalar/GVN.cpp | 122 ++++++++++++++++++
.../test/Transforms/GVN/PRE/rnflow-gvn-pre.ll | 59 +++++++++
3 files changed, 185 insertions(+)
create mode 100644 llvm/test/Transforms/GVN/PRE/rnflow-gvn-pre.ll
diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h
index bc0f108ac8260..b886140348e79 100644
--- a/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -22,6 +22,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Compiler.h"
@@ -45,6 +46,7 @@ class FunctionPass;
class GetElementPtrInst;
class ImplicitControlFlowTracking;
class LoadInst;
+class SelectInst;
class LoopInfo;
class MemDepResult;
class MemoryAccess;
@@ -405,6 +407,8 @@ class GVNPass : public PassInfoMixin<GVNPass> {
void addDeadBlock(BasicBlock *BB);
void assignValNumForDeadCode();
void assignBlockRPONumber(Function &F);
+
+ bool optimizeMinMaxFindingSelectPattern(SelectInst *Select);
};
/// Create a legacy GVN pass.
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 72e1131a54a86..ca4b422bfb896 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -2743,6 +2743,10 @@ bool GVNPass::processInstruction(Instruction *I) {
}
return Changed;
}
+ if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
+ if (optimizeMinMaxFindingSelectPattern(Select))
+ return true;
+ }
// Instructions with void type don't return a value, so there's
// no point in trying to find redundancies in them.
@@ -3330,6 +3334,124 @@ void GVNPass::assignValNumForDeadCode() {
}
}
+bool GVNPass::optimizeMinMaxFindingSelectPattern(SelectInst *Select) {
+ LLVM_DEBUG(
+ dbgs()
+ << "GVN: Analyzing select instruction for minimum finding pattern\n");
+ LLVM_DEBUG(dbgs() << "GVN: Select: " << *Select << "\n");
+ Value *Condition = Select->getCondition();
+ CmpInst *Comparison = dyn_cast<CmpInst>(Condition);
+ if (!Comparison) {
+ LLVM_DEBUG(dbgs() << "GVN: Condition is not a comparison\n");
+ return false;
+ }
+
+ // Check if this is ULT comparison.
+ CmpInst::Predicate Pred = Comparison->getPredicate();
+ if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT &&
+ Pred != CmpInst::FCMP_OLT && Pred != CmpInst::FCMP_ULT) {
+ LLVM_DEBUG(dbgs() << "GVN: Not a less-than comparison, predicate: " << Pred
+ << "\n");
+ return false;
+ }
+
+ // Check that both operands are loads.
+ Value *LHS = Comparison->getOperand(0);
+ Value *RHS = Comparison->getOperand(1);
+ if (!isa<LoadInst>(LHS) || !isa<LoadInst>(RHS)) {
+ LLVM_DEBUG(dbgs() << "GVN: Not both operands are loads\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "GVN: Found minimum finding pattern in Block: "
+ << Select->getParent()->getName() << "\n");
+
+ // Transform the pattern.
+ // Hoist the chain of operations for the second load to preheader.
+ // Get predecessor of the block containing the select instruction.
+ BasicBlock *BB = Select->getParent();
+
+ // Get preheader of the loop.
+ Loop *L = LI->getLoopFor(BB);
+ if (!L) {
+ LLVM_DEBUG(dbgs() << "GVN: Could not find loop\n");
+ return false;
+ }
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ LLVM_DEBUG(dbgs() << "GVN: Could not find loop preheader\n");
+ return false;
+ }
+
+ // Hoist the chain of operations for the second load to preheader.
+ // %90 = sext i32 %.05.i to i64
+ // %91 = getelementptr float, ptr %0, i64 %90 ; %0 + (sext i32 %85 to i64)*4
+ // %92 = getelementptr i8, ptr %91, i64 -4 ; %0 + (sext i32 %85 to i64)*4 - 4
+ // %93 = load float, ptr %92, align 4
+
+ Value *BasePtr = nullptr, *IndexVal = nullptr, *OffsetVal = nullptr;
+ IRBuilder<> Builder(Preheader->getTerminator());
+ if (match(RHS,
+ m_Load(m_GEP(m_GEP(m_Value(BasePtr), m_SExt(m_Value(IndexVal))),
+ m_Value(OffsetVal))))) {
+ LLVM_DEBUG(dbgs() << "GVN: Found pattern: " << *RHS << "\n");
+ LLVM_DEBUG(dbgs() << "GVN: Found pattern: " << "\n");
+
+ PHINode *Phi = dyn_cast<PHINode>(IndexVal);
+ if (!Phi) {
+ LLVM_DEBUG(dbgs() << "GVN: IndexVal is not a PHI node\n");
+ return false;
+ }
+ Value *InitialMinIndex = Phi->getIncomingValueForBlock(Preheader);
+
+ // Insert PHI node at the top of this block.
+ PHINode *KnownMinPhi =
+ PHINode::Create(Builder.getFloatTy(), 2, "known_min", BB->begin());
+
+ // Build the GEP chain in the preheader.
+ // 1. hoist_0 = sext i32 to i64
+ Value *HoistedSExt =
+ Builder.CreateSExt(InitialMinIndex, Builder.getInt64Ty(), "hoist_sext");
+
+ // 2. hoist_gep1 = getelementptr float, ptr BasePtr, i64 HoistedSExt
+ Value *HoistedGEP1 = Builder.CreateGEP(Builder.getFloatTy(), BasePtr,
+ HoistedSExt, "hoist_gep1");
+
+ // 3. hoist_gep2 = getelementptr i8, ptr HoistedGEP1, i64 OffsetVal
+ Value *HoistedGEP2 = Builder.CreateGEP(Builder.getInt8Ty(), HoistedGEP1,
+ OffsetVal, "hoist_gep2");
+
+ // 4. hoisted_load = load float, ptr HoistedGEP2
+ LoadInst *NewLoad =
+ Builder.CreateLoad(Builder.getFloatTy(), HoistedGEP2, "hoisted_load");
+
+ // Replace all uses of load with new load.
+ RHS->replaceAllUsesWith(NewLoad);
+ dyn_cast<LoadInst>(RHS)->eraseFromParent();
+
+ // Replace second operand of comparison with KnownMinPhi.
+ Comparison->setOperand(1, KnownMinPhi);
+
+ // Create new select instruction for selecting the minimum value.
+ IRBuilder<> SelectBuilder(BB->getTerminator());
+ SelectInst *CurrentMinSelect =
+ dyn_cast<SelectInst>(SelectBuilder.CreateSelect(
+ Comparison, LHS, KnownMinPhi, "current_min"));
+
+ // Populate PHI node.
+ KnownMinPhi->addIncoming(NewLoad, Preheader);
+ KnownMinPhi->addIncoming(CurrentMinSelect, BB);
+ LLVM_DEBUG(dbgs() << "Transformed the code\n");
+ return true;
+ } else {
+ LLVM_DEBUG(dbgs() << "GVN: Could not find pattern: " << *RHS << "\n");
+ LLVM_DEBUG(dbgs() << "GVN: Could not find pattern: " << "\n");
+ return false;
+ }
+ return false;
+}
+
+
class llvm::gvn::GVNLegacyPass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
diff --git a/llvm/test/Transforms/GVN/PRE/rnflow-gvn-pre.ll b/llvm/test/Transforms/GVN/PRE/rnflow-gvn-pre.ll
new file mode 100644
index 0000000000000..6f17d4ab30240
--- /dev/null
+++ b/llvm/test/Transforms/GVN/PRE/rnflow-gvn-pre.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; Minimal test case containing only the .lr.ph.i basic block
+; RUN: opt -passes=gvn -S < %s | FileCheck %s
+
+define void @test_lr_ph_i(ptr %0) {
+; CHECK-LABEL: define void @test_lr_ph_i(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[HOIST_GEP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 1
+; CHECK-NEXT: [[HOIST_GEP2:%.*]] = getelementptr i8, ptr [[HOIST_GEP1]], i64 -4
+; CHECK-NEXT: [[HOISTED_LOAD:%.*]] = load float, ptr [[HOIST_GEP2]], align 4
+; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
+; CHECK: [[_LR_PH_I:.*:]]
+; CHECK-NEXT: [[KNOWN_MIN:%.*]] = phi float [ [[HOISTED_LOAD]], %[[ENTRY]] ], [ [[CURRENT_MIN:%.*]], %[[DOTLR_PH_I]] ]
+; CHECK-NEXT: [[INDVARS_IV_I:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT_I:%.*]], %[[DOTLR_PH_I]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP10:%.*]], %[[DOTLR_PH_I]] ]
+; CHECK-NEXT: [[DOT05_I:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[DOT1_I:%.*]], %[[DOTLR_PH_I]] ]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_I]] = add nsw i64 [[INDVARS_IV_I]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP0]], i64 [[INDVARS_IV_I]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP2]], i64 -8
+; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[DOT05_I]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[TMP0]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i64 -4
+; CHECK-NEXT: [[TMP8:%.*]] = fcmp contract olt float [[TMP4]], [[KNOWN_MIN]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_I]] to i32
+; CHECK-NEXT: [[DOT1_I]] = select i1 [[TMP8]], i32 [[TMP9]], i32 [[DOT05_I]]
+; CHECK-NEXT: [[TMP10]] = add nsw i64 [[TMP1]], -1
+; CHECK-NEXT: [[TMP11:%.*]] = icmp samesign ugt i64 [[TMP1]], 1
+; CHECK-NEXT: [[CURRENT_MIN]] = select i1 [[TMP8]], float [[TMP4]], float [[KNOWN_MIN]]
+; CHECK-NEXT: br i1 [[TMP11]], label %[[DOTLR_PH_I]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %.lr.ph.i
+
+.lr.ph.i: ; preds = %.lr.ph.i, %entry
+ %indvars.iv.i = phi i64 [ 1, %entry ], [ %indvars.iv.next.i, %.lr.ph.i ]
+ %86 = phi i64 [ 0, %entry ], [ %96, %.lr.ph.i ]
+ %.05.i = phi i32 [ 1, %entry ], [ %.1.i, %.lr.ph.i ]
+ %indvars.iv.next.i = add nsw i64 %indvars.iv.i, -1
+ %87 = getelementptr float, ptr %0, i64 %indvars.iv.i
+ %88 = getelementptr i8, ptr %87, i64 -8 ; first load : %0 + 4 * 1 - 8
+ %89 = load float, ptr %88, align 4
+ %90 = sext i32 %.05.i to i64
+ %91 = getelementptr float, ptr %0, i64 %90 ; %0 + 4 * 1
+ %92 = getelementptr i8, ptr %91, i64 -4 ; second load : %0 + 4 * 1 - 4
+ %93 = load float, ptr %92, align 4
+ %94 = fcmp contract olt float %89, %93
+ %95 = trunc nsw i64 %indvars.iv.next.i to i32
+ %.1.i = select i1 %94, i32 %95, i32 %.05.i
+ %96 = add nsw i64 %86, -1
+ %97 = icmp samesign ugt i64 %86, 1
+ br i1 %97, label %.lr.ph.i, label %exit
+
+exit:
+ ret void
+}
>From ea56b060128b72bea881ba04bdb5f59da1832cbe Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Sun, 9 Nov 2025 20:53:20 -0800
Subject: [PATCH 2/5] fixup! [GVN] Support rnflow pattern matching and
transform
---
llvm/include/llvm/Transforms/Scalar/GVN.h | 9 +-
llvm/lib/Transforms/Scalar/GVN.cpp | 185 +++++++++---------
llvm/test/Transforms/GVN/PRE/gvn-min-pre.ll | 58 ++++++
.../test/Transforms/GVN/PRE/rnflow-gvn-pre.ll | 59 ------
4 files changed, 160 insertions(+), 151 deletions(-)
create mode 100644 llvm/test/Transforms/GVN/PRE/gvn-min-pre.ll
delete mode 100644 llvm/test/Transforms/GVN/PRE/rnflow-gvn-pre.ll
diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h
index b886140348e79..d598803be3585 100644
--- a/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -407,8 +407,13 @@ class GVNPass : public PassInfoMixin<GVNPass> {
void addDeadBlock(BasicBlock *BB);
void assignValNumForDeadCode();
void assignBlockRPONumber(Function &F);
-
- bool optimizeMinMaxFindingSelectPattern(SelectInst *Select);
+
+ bool recognizeMinFindingSelectPattern(SelectInst *Select);
+ bool transformMinFindingSelectPattern(Loop *L, BasicBlock *Preheader,
+ BasicBlock *BB, Value *LHS, Value *RHS,
+ CmpInst *Comparison, SelectInst *Select,
+ Value *BasePtr, Value *IndexVal,
+ Value *OffsetVal);
};
/// Create a legacy GVN pass.
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index ca4b422bfb896..69f8a87a36d0d 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -2744,7 +2744,7 @@ bool GVNPass::processInstruction(Instruction *I) {
return Changed;
}
if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
- if (optimizeMinMaxFindingSelectPattern(Select))
+ if (recognizeMinFindingSelectPattern(Select))
return true;
}
@@ -3334,19 +3334,99 @@ void GVNPass::assignValNumForDeadCode() {
}
}
-bool GVNPass::optimizeMinMaxFindingSelectPattern(SelectInst *Select) {
+bool GVNPass::transformMinFindingSelectPattern(Loop *L, BasicBlock *Preheader,
+ BasicBlock *BB, Value *LHS,
+ Value *RHS, CmpInst *Comparison,
+ SelectInst *Select,
+ Value *BasePtr, Value *IndexVal,
+ Value *OffsetVal) {
+ // Hoist the chain of operations for the second load to preheader.
+ // %min.idx.ext = sext i32 %min.idx to i64
+ // %ptr.float.min = getelementptr float, ptr %0, i64 %min.idx.ext
+ // %ptr.second.load = getelementptr i8, ptr %ptr.float.min, i64 -4
+ // %val.current.min = load float, ptr %ptr.second.load, align 4
+ IRBuilder<> Builder(Preheader->getTerminator());
+
+ PHINode *Phi = dyn_cast<PHINode>(IndexVal);
+ if (!Phi) {
+ LLVM_DEBUG(dbgs() << "GVN: IndexVal is not a PHI node\n");
+ return false;
+ }
+
+ Value *InitialMinIndex = Phi->getIncomingValueForBlock(Preheader);
+
+ // Insert PHI node at the top of this block.
+ // This PHI node will be used to memoize the current minimum value so far.
+ PHINode *KnownMinPhi =
+ PHINode::Create(Builder.getFloatTy(), 2, "known_min", BB->begin());
+
+ // Hoist the load and build the necessary operations.
+ // 1. hoist_0 = sext i32 to i64
+ Value *HoistedSExt =
+ Builder.CreateSExt(InitialMinIndex, Builder.getInt64Ty(), "hoist_sext");
+
+ // 2. hoist_gep1 = getelementptr float, ptr BasePtr, i64 HoistedSExt
+ Value *HoistedGEP1 = Builder.CreateGEP(Builder.getFloatTy(), BasePtr,
+ HoistedSExt, "hoist_gep1");
+
+ // 3. hoist_gep2 = getelementptr i8, ptr HoistedGEP1, i64 OffsetVal
+ Value *HoistedGEP2 = Builder.CreateGEP(Builder.getInt8Ty(), HoistedGEP1,
+ OffsetVal, "hoist_gep2");
+
+ // 4. hoisted_load = load float, ptr HoistedGEP2
+ LoadInst *NewLoad =
+ Builder.CreateLoad(Builder.getFloatTy(), HoistedGEP2, "hoisted_load");
+
+ // Let the new load now take the place of the old load.
+ RHS->replaceAllUsesWith(NewLoad);
+ dyn_cast<LoadInst>(RHS)->eraseFromParent();
+
+ // Comparison should now compare the current value and the newly inserted
+ // PHI node.
+ Comparison->setOperand(1, KnownMinPhi);
+
+ // Create new select instruction for selecting the minimum value.
+ IRBuilder<> SelectBuilder(BB->getTerminator());
+ SelectInst *CurrentMinSelect = dyn_cast<SelectInst>(
+ SelectBuilder.CreateSelect(Comparison, LHS, KnownMinPhi, "current_min"));
+
+ // Populate the newly created PHI node
+ // with (hoisted) NewLoad from the preheader and CurrentMinSelect.
+ KnownMinPhi->addIncoming(NewLoad, Preheader);
+ KnownMinPhi->addIncoming(CurrentMinSelect, BB);
+ LLVM_DEBUG(dbgs() << "Transformed the code\n");
+ return true;
+}
+
+bool GVNPass::recognizeMinFindingSelectPattern(SelectInst *Select) {
+ Value *BasePtr = nullptr, *IndexVal = nullptr, *OffsetVal = nullptr;
LLVM_DEBUG(
dbgs()
- << "GVN: Analyzing select instruction for minimum finding pattern\n");
+ << "GVN: Analyzing select instruction for minimum finding pattern.\n");
LLVM_DEBUG(dbgs() << "GVN: Select: " << *Select << "\n");
+ BasicBlock *BB = Select->getParent();
+
+ // If the block is not in a loop, bail out.
+ Loop *L = LI->getLoopFor(BB);
+ if (!L) {
+ LLVM_DEBUG(dbgs() << "GVN: Could not find loop.\n");
+ return false;
+ }
+
+ // If preheader of the loop is not found, bail out.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ LLVM_DEBUG(dbgs() << "GVN: Could not find loop preheader.\n");
+ return false;
+ }
Value *Condition = Select->getCondition();
CmpInst *Comparison = dyn_cast<CmpInst>(Condition);
if (!Comparison) {
- LLVM_DEBUG(dbgs() << "GVN: Condition is not a comparison\n");
+ LLVM_DEBUG(dbgs() << "GVN: Condition is not a comparison.\n");
return false;
}
- // Check if this is ULT comparison.
+ // Check if this is less-than comparison.
CmpInst::Predicate Pred = Comparison->getPredicate();
if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT &&
Pred != CmpInst::FCMP_OLT && Pred != CmpInst::FCMP_ULT) {
@@ -3359,99 +3439,24 @@ bool GVNPass::optimizeMinMaxFindingSelectPattern(SelectInst *Select) {
Value *LHS = Comparison->getOperand(0);
Value *RHS = Comparison->getOperand(1);
if (!isa<LoadInst>(LHS) || !isa<LoadInst>(RHS)) {
- LLVM_DEBUG(dbgs() << "GVN: Not both operands are loads\n");
+ LLVM_DEBUG(dbgs() << "GVN: Not both operands are loads.\n");
return false;
}
- LLVM_DEBUG(dbgs() << "GVN: Found minimum finding pattern in Block: "
- << Select->getParent()->getName() << "\n");
-
- // Transform the pattern.
- // Hoist the chain of operations for the second load to preheader.
- // Get predecessor of the block containing the select instruction.
- BasicBlock *BB = Select->getParent();
-
- // Get preheader of the loop.
- Loop *L = LI->getLoopFor(BB);
- if (!L) {
- LLVM_DEBUG(dbgs() << "GVN: Could not find loop\n");
- return false;
- }
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) {
- LLVM_DEBUG(dbgs() << "GVN: Could not find loop preheader\n");
+ if (!match(RHS,
+ m_Load(m_GEP(m_GEP(m_Value(BasePtr), m_SExt(m_Value(IndexVal))),
+ m_Value(OffsetVal))))) {
+ LLVM_DEBUG(dbgs() << "GVN: Not a required load pattern.\n");
return false;
}
+ LLVM_DEBUG(dbgs() << "GVN: Found minimum finding pattern in Block: "
+ << Select->getParent()->getName() << ".\n");
- // Hoist the chain of operations for the second load to preheader.
- // %90 = sext i32 %.05.i to i64
- // %91 = getelementptr float, ptr %0, i64 %90 ; %0 + (sext i32 %85 to i64)*4
- // %92 = getelementptr i8, ptr %91, i64 -4 ; %0 + (sext i32 %85 to i64)*4 - 4
- // %93 = load float, ptr %92, align 4
-
- Value *BasePtr = nullptr, *IndexVal = nullptr, *OffsetVal = nullptr;
- IRBuilder<> Builder(Preheader->getTerminator());
- if (match(RHS,
- m_Load(m_GEP(m_GEP(m_Value(BasePtr), m_SExt(m_Value(IndexVal))),
- m_Value(OffsetVal))))) {
- LLVM_DEBUG(dbgs() << "GVN: Found pattern: " << *RHS << "\n");
- LLVM_DEBUG(dbgs() << "GVN: Found pattern: " << "\n");
-
- PHINode *Phi = dyn_cast<PHINode>(IndexVal);
- if (!Phi) {
- LLVM_DEBUG(dbgs() << "GVN: IndexVal is not a PHI node\n");
- return false;
- }
- Value *InitialMinIndex = Phi->getIncomingValueForBlock(Preheader);
-
- // Insert PHI node at the top of this block.
- PHINode *KnownMinPhi =
- PHINode::Create(Builder.getFloatTy(), 2, "known_min", BB->begin());
-
- // Build the GEP chain in the preheader.
- // 1. hoist_0 = sext i32 to i64
- Value *HoistedSExt =
- Builder.CreateSExt(InitialMinIndex, Builder.getInt64Ty(), "hoist_sext");
-
- // 2. hoist_gep1 = getelementptr float, ptr BasePtr, i64 HoistedSExt
- Value *HoistedGEP1 = Builder.CreateGEP(Builder.getFloatTy(), BasePtr,
- HoistedSExt, "hoist_gep1");
-
- // 3. hoist_gep2 = getelementptr i8, ptr HoistedGEP1, i64 OffsetVal
- Value *HoistedGEP2 = Builder.CreateGEP(Builder.getInt8Ty(), HoistedGEP1,
- OffsetVal, "hoist_gep2");
-
- // 4. hoisted_load = load float, ptr HoistedGEP2
- LoadInst *NewLoad =
- Builder.CreateLoad(Builder.getFloatTy(), HoistedGEP2, "hoisted_load");
-
- // Replace all uses of load with new load.
- RHS->replaceAllUsesWith(NewLoad);
- dyn_cast<LoadInst>(RHS)->eraseFromParent();
-
- // Replace second operand of comparison with KnownMinPhi.
- Comparison->setOperand(1, KnownMinPhi);
-
- // Create new select instruction for selecting the minimum value.
- IRBuilder<> SelectBuilder(BB->getTerminator());
- SelectInst *CurrentMinSelect =
- dyn_cast<SelectInst>(SelectBuilder.CreateSelect(
- Comparison, LHS, KnownMinPhi, "current_min"));
-
- // Populate PHI node.
- KnownMinPhi->addIncoming(NewLoad, Preheader);
- KnownMinPhi->addIncoming(CurrentMinSelect, BB);
- LLVM_DEBUG(dbgs() << "Transformed the code\n");
- return true;
- } else {
- LLVM_DEBUG(dbgs() << "GVN: Could not find pattern: " << *RHS << "\n");
- LLVM_DEBUG(dbgs() << "GVN: Could not find pattern: " << "\n");
- return false;
- }
- return false;
+ return transformMinFindingSelectPattern(L, Preheader, BB, LHS, RHS,
+ Comparison, Select, BasePtr, IndexVal,
+ OffsetVal);
}
-
class llvm::gvn::GVNLegacyPass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
diff --git a/llvm/test/Transforms/GVN/PRE/gvn-min-pre.ll b/llvm/test/Transforms/GVN/PRE/gvn-min-pre.ll
new file mode 100644
index 0000000000000..33ba2e383bdf6
--- /dev/null
+++ b/llvm/test/Transforms/GVN/PRE/gvn-min-pre.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=gvn -S < %s | FileCheck %s
+
+define void @test_gvn_min_pattern(ptr %0) {
+; CHECK-LABEL: define void @test_gvn_min_pattern(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[HOIST_GEP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 1
+; CHECK-NEXT: [[HOIST_GEP2:%.*]] = getelementptr i8, ptr [[HOIST_GEP1]], i64 -4
+; CHECK-NEXT: [[HOISTED_LOAD:%.*]] = load float, ptr [[HOIST_GEP2]], align 4
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[KNOWN_MIN:%.*]] = phi float [ [[HOISTED_LOAD]], %[[ENTRY]] ], [ [[CURRENT_MIN:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[INDVARS_IV_I:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT_I:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[LOOP_COUNTER:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[LOOP_COUNTER_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_I]] = add nsw i64 [[INDVARS_IV_I]], -1
+; CHECK-NEXT: [[PTR_FLOAT_IV:%.*]] = getelementptr float, ptr [[TMP0]], i64 [[INDVARS_IV_I]]
+; CHECK-NEXT: [[PTR_FIRST_LOAD:%.*]] = getelementptr i8, ptr [[PTR_FLOAT_IV]], i64 -8
+; CHECK-NEXT: [[VAL_FIRST:%.*]] = load float, ptr [[PTR_FIRST_LOAD]], align 4
+; CHECK-NEXT: [[MIN_IDX_EXT:%.*]] = sext i32 [[MIN_IDX]] to i64
+; CHECK-NEXT: [[PTR_FLOAT_MIN:%.*]] = getelementptr float, ptr [[TMP0]], i64 [[MIN_IDX_EXT]]
+; CHECK-NEXT: [[PTR_SECOND_LOAD:%.*]] = getelementptr i8, ptr [[PTR_FLOAT_MIN]], i64 -4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp contract olt float [[VAL_FIRST]], [[KNOWN_MIN]]
+; CHECK-NEXT: [[NEXT_IDX_TRUNC:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_I]] to i32
+; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i32 [[NEXT_IDX_TRUNC]], i32 [[MIN_IDX]]
+; CHECK-NEXT: [[LOOP_COUNTER_NEXT]] = add nsw i64 [[LOOP_COUNTER]], -1
+; CHECK-NEXT: [[LOOP_CONTINUE:%.*]] = icmp samesign ugt i64 [[LOOP_COUNTER]], 1
+; CHECK-NEXT: [[CURRENT_MIN]] = select i1 [[CMP]], float [[VAL_FIRST]], float [[KNOWN_MIN]]
+; CHECK-NEXT: br i1 [[LOOP_CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %indvars.iv.i = phi i64 [ 1, %entry ], [ %indvars.iv.next.i, %loop ]
+ %loop.counter = phi i64 [ 0, %entry ], [ %loop.counter.next, %loop ]
+ %min.idx = phi i32 [ 1, %entry ], [ %min.idx.next, %loop ]
+ %indvars.iv.next.i = add nsw i64 %indvars.iv.i, -1
+ %ptr.float.iv = getelementptr float, ptr %0, i64 %indvars.iv.i
+ %ptr.first.load = getelementptr i8, ptr %ptr.float.iv, i64 -8
+ %val.first = load float, ptr %ptr.first.load, align 4
+ %min.idx.ext = sext i32 %min.idx to i64
+ %ptr.float.min = getelementptr float, ptr %0, i64 %min.idx.ext
+ %ptr.second.load = getelementptr i8, ptr %ptr.float.min, i64 -4
+ %val.current.min = load float, ptr %ptr.second.load, align 4
+ %cmp = fcmp contract olt float %val.first, %val.current.min
+ %next.idx.trunc = trunc nsw i64 %indvars.iv.next.i to i32
+ %min.idx.next = select i1 %cmp, i32 %next.idx.trunc, i32 %min.idx
+ %loop.counter.next = add nsw i64 %loop.counter, -1
+ %loop.continue = icmp samesign ugt i64 %loop.counter, 1
+ br i1 %loop.continue, label %loop, label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/GVN/PRE/rnflow-gvn-pre.ll b/llvm/test/Transforms/GVN/PRE/rnflow-gvn-pre.ll
deleted file mode 100644
index 6f17d4ab30240..0000000000000
--- a/llvm/test/Transforms/GVN/PRE/rnflow-gvn-pre.ll
+++ /dev/null
@@ -1,59 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
-; Minimal test case containing only the .lr.ph.i basic block
-; RUN: opt -passes=gvn -S < %s | FileCheck %s
-
-define void @test_lr_ph_i(ptr %0) {
-; CHECK-LABEL: define void @test_lr_ph_i(
-; CHECK-SAME: ptr [[TMP0:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[HOIST_GEP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 1
-; CHECK-NEXT: [[HOIST_GEP2:%.*]] = getelementptr i8, ptr [[HOIST_GEP1]], i64 -4
-; CHECK-NEXT: [[HOISTED_LOAD:%.*]] = load float, ptr [[HOIST_GEP2]], align 4
-; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]]
-; CHECK: [[_LR_PH_I:.*:]]
-; CHECK-NEXT: [[KNOWN_MIN:%.*]] = phi float [ [[HOISTED_LOAD]], %[[ENTRY]] ], [ [[CURRENT_MIN:%.*]], %[[DOTLR_PH_I]] ]
-; CHECK-NEXT: [[INDVARS_IV_I:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT_I:%.*]], %[[DOTLR_PH_I]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP10:%.*]], %[[DOTLR_PH_I]] ]
-; CHECK-NEXT: [[DOT05_I:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[DOT1_I:%.*]], %[[DOTLR_PH_I]] ]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_I]] = add nsw i64 [[INDVARS_IV_I]], -1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP0]], i64 [[INDVARS_IV_I]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP2]], i64 -8
-; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[DOT05_I]] to i64
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[TMP0]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i64 -4
-; CHECK-NEXT: [[TMP8:%.*]] = fcmp contract olt float [[TMP4]], [[KNOWN_MIN]]
-; CHECK-NEXT: [[TMP9:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_I]] to i32
-; CHECK-NEXT: [[DOT1_I]] = select i1 [[TMP8]], i32 [[TMP9]], i32 [[DOT05_I]]
-; CHECK-NEXT: [[TMP10]] = add nsw i64 [[TMP1]], -1
-; CHECK-NEXT: [[TMP11:%.*]] = icmp samesign ugt i64 [[TMP1]], 1
-; CHECK-NEXT: [[CURRENT_MIN]] = select i1 [[TMP8]], float [[TMP4]], float [[KNOWN_MIN]]
-; CHECK-NEXT: br i1 [[TMP11]], label %[[DOTLR_PH_I]], label %[[EXIT:.*]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
-;
-entry:
- br label %.lr.ph.i
-
-.lr.ph.i: ; preds = %.lr.ph.i, %entry
- %indvars.iv.i = phi i64 [ 1, %entry ], [ %indvars.iv.next.i, %.lr.ph.i ]
- %86 = phi i64 [ 0, %entry ], [ %96, %.lr.ph.i ]
- %.05.i = phi i32 [ 1, %entry ], [ %.1.i, %.lr.ph.i ]
- %indvars.iv.next.i = add nsw i64 %indvars.iv.i, -1
- %87 = getelementptr float, ptr %0, i64 %indvars.iv.i
- %88 = getelementptr i8, ptr %87, i64 -8 ; first load : %0 + 4 * 1 - 8
- %89 = load float, ptr %88, align 4
- %90 = sext i32 %.05.i to i64
- %91 = getelementptr float, ptr %0, i64 %90 ; %0 + 4 * 1
- %92 = getelementptr i8, ptr %91, i64 -4 ; second load : %0 + 4 * 1 - 4
- %93 = load float, ptr %92, align 4
- %94 = fcmp contract olt float %89, %93
- %95 = trunc nsw i64 %indvars.iv.next.i to i32
- %.1.i = select i1 %94, i32 %95, i32 %.05.i
- %96 = add nsw i64 %86, -1
- %97 = icmp samesign ugt i64 %86, 1
- br i1 %97, label %.lr.ph.i, label %exit
-
-exit:
- ret void
-}
>From 153ee85fd35224a5d177d537208db10704ce6ca7 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Thu, 13 Nov 2025 01:58:25 -0800
Subject: [PATCH 3/5] fixup! generalize load type.
---
llvm/include/llvm/Transforms/Scalar/GVN.h | 2 +-
llvm/lib/Transforms/Scalar/GVN.cpp | 13 ++++++++-----
llvm/test/Transforms/GVN/PRE/gvn-min-pre.ll | 4 ++--
3 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h
index d598803be3585..3a56938f60d45 100644
--- a/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -409,7 +409,7 @@ class GVNPass : public PassInfoMixin<GVNPass> {
void assignBlockRPONumber(Function &F);
bool recognizeMinFindingSelectPattern(SelectInst *Select);
- bool transformMinFindingSelectPattern(Loop *L, BasicBlock *Preheader,
+ bool transformMinFindingSelectPattern(Loop *L, Type *LoadType, BasicBlock *Preheader,
BasicBlock *BB, Value *LHS, Value *RHS,
CmpInst *Comparison, SelectInst *Select,
Value *BasePtr, Value *IndexVal,
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 69f8a87a36d0d..26b98645e6e87 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -3334,7 +3334,7 @@ void GVNPass::assignValNumForDeadCode() {
}
}
-bool GVNPass::transformMinFindingSelectPattern(Loop *L, BasicBlock *Preheader,
+bool GVNPass::transformMinFindingSelectPattern(Loop *L, Type *LoadType, BasicBlock *Preheader,
BasicBlock *BB, Value *LHS,
Value *RHS, CmpInst *Comparison,
SelectInst *Select,
@@ -3358,7 +3358,7 @@ bool GVNPass::transformMinFindingSelectPattern(Loop *L, BasicBlock *Preheader,
// Insert PHI node at the top of this block.
// This PHI node will be used to memoize the current minimum value so far.
PHINode *KnownMinPhi =
- PHINode::Create(Builder.getFloatTy(), 2, "known_min", BB->begin());
+ PHINode::Create(LoadType, 2, "known_min", BB->begin());
// Hoist the load and build the necessary operations.
// 1. hoist_0 = sext i32 to i64
@@ -3366,7 +3366,7 @@ bool GVNPass::transformMinFindingSelectPattern(Loop *L, BasicBlock *Preheader,
Builder.CreateSExt(InitialMinIndex, Builder.getInt64Ty(), "hoist_sext");
// 2. hoist_gep1 = getelementptr float, ptr BasePtr, i64 HoistedSExt
- Value *HoistedGEP1 = Builder.CreateGEP(Builder.getFloatTy(), BasePtr,
+ Value *HoistedGEP1 = Builder.CreateGEP(LoadType, BasePtr,
HoistedSExt, "hoist_gep1");
// 3. hoist_gep2 = getelementptr i8, ptr HoistedGEP1, i64 OffsetVal
@@ -3375,7 +3375,7 @@ bool GVNPass::transformMinFindingSelectPattern(Loop *L, BasicBlock *Preheader,
// 4. hoisted_load = load float, ptr HoistedGEP2
LoadInst *NewLoad =
- Builder.CreateLoad(Builder.getFloatTy(), HoistedGEP2, "hoisted_load");
+ Builder.CreateLoad(LoadType, HoistedGEP2, "hoisted_load");
// Let the new load now take the place of the old load.
RHS->replaceAllUsesWith(NewLoad);
@@ -3452,7 +3452,10 @@ bool GVNPass::recognizeMinFindingSelectPattern(SelectInst *Select) {
LLVM_DEBUG(dbgs() << "GVN: Found minimum finding pattern in Block: "
<< Select->getParent()->getName() << ".\n");
- return transformMinFindingSelectPattern(L, Preheader, BB, LHS, RHS,
+ // Get type of load.
+ Type *LoadType = dyn_cast<LoadInst>(LHS)->getType();
+ LLVM_DEBUG(dbgs() << "GVN: Transforming minimum finding pattern.\n");
+ return transformMinFindingSelectPattern(L, LoadType,Preheader, BB, LHS, RHS,
Comparison, Select, BasePtr, IndexVal,
OffsetVal);
}
diff --git a/llvm/test/Transforms/GVN/PRE/gvn-min-pre.ll b/llvm/test/Transforms/GVN/PRE/gvn-min-pre.ll
index 33ba2e383bdf6..19fec514b28fe 100644
--- a/llvm/test/Transforms/GVN/PRE/gvn-min-pre.ll
+++ b/llvm/test/Transforms/GVN/PRE/gvn-min-pre.ll
@@ -40,11 +40,11 @@ loop: ; preds = %loop, %entry
%min.idx = phi i32 [ 1, %entry ], [ %min.idx.next, %loop ]
%indvars.iv.next.i = add nsw i64 %indvars.iv.i, -1
%ptr.float.iv = getelementptr float, ptr %0, i64 %indvars.iv.i
- %ptr.first.load = getelementptr i8, ptr %ptr.float.iv, i64 -8
+ %ptr.first.load = getelementptr i8, ptr %ptr.float.iv, i64 -8
%val.first = load float, ptr %ptr.first.load, align 4
%min.idx.ext = sext i32 %min.idx to i64
%ptr.float.min = getelementptr float, ptr %0, i64 %min.idx.ext
- %ptr.second.load = getelementptr i8, ptr %ptr.float.min, i64 -4
+ %ptr.second.load = getelementptr i8, ptr %ptr.float.min, i64 -4
%val.current.min = load float, ptr %ptr.second.load, align 4
%cmp = fcmp contract olt float %val.first, %val.current.min
%next.idx.trunc = trunc nsw i64 %indvars.iv.next.i to i32
>From 61252862834f61fcbcefb48e6e7d4e343de16094 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Thu, 13 Nov 2025 01:59:49 -0800
Subject: [PATCH 4/5] fixup! clang-format
---
llvm/include/llvm/Transforms/Scalar/GVN.h | 5 +++--
llvm/lib/Transforms/Scalar/GVN.cpp | 22 +++++++++-------------
2 files changed, 12 insertions(+), 15 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h
index 3a56938f60d45..c35f9a03112cb 100644
--- a/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -409,8 +409,9 @@ class GVNPass : public PassInfoMixin<GVNPass> {
void assignBlockRPONumber(Function &F);
bool recognizeMinFindingSelectPattern(SelectInst *Select);
- bool transformMinFindingSelectPattern(Loop *L, Type *LoadType, BasicBlock *Preheader,
- BasicBlock *BB, Value *LHS, Value *RHS,
+ bool transformMinFindingSelectPattern(Loop *L, Type *LoadType,
+ BasicBlock *Preheader, BasicBlock *BB,
+ Value *LHS, Value *RHS,
CmpInst *Comparison, SelectInst *Select,
Value *BasePtr, Value *IndexVal,
Value *OffsetVal);
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 26b98645e6e87..c41518f874623 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -3334,12 +3334,10 @@ void GVNPass::assignValNumForDeadCode() {
}
}
-bool GVNPass::transformMinFindingSelectPattern(Loop *L, Type *LoadType, BasicBlock *Preheader,
- BasicBlock *BB, Value *LHS,
- Value *RHS, CmpInst *Comparison,
- SelectInst *Select,
- Value *BasePtr, Value *IndexVal,
- Value *OffsetVal) {
+bool GVNPass::transformMinFindingSelectPattern(
+ Loop *L, Type *LoadType, BasicBlock *Preheader, BasicBlock *BB, Value *LHS,
+ Value *RHS, CmpInst *Comparison, SelectInst *Select, Value *BasePtr,
+ Value *IndexVal, Value *OffsetVal) {
// Hoist the chain of operations for the second load to preheader.
// %min.idx.ext = sext i32 %min.idx to i64
// %ptr.float.min = getelementptr float, ptr %0, i64 %min.idx.ext
@@ -3357,8 +3355,7 @@ bool GVNPass::transformMinFindingSelectPattern(Loop *L, Type *LoadType, BasicBlo
// Insert PHI node at the top of this block.
// This PHI node will be used to memoize the current minimum value so far.
- PHINode *KnownMinPhi =
- PHINode::Create(LoadType, 2, "known_min", BB->begin());
+ PHINode *KnownMinPhi = PHINode::Create(LoadType, 2, "known_min", BB->begin());
// Hoist the load and build the necessary operations.
// 1. hoist_0 = sext i32 to i64
@@ -3366,16 +3363,15 @@ bool GVNPass::transformMinFindingSelectPattern(Loop *L, Type *LoadType, BasicBlo
Builder.CreateSExt(InitialMinIndex, Builder.getInt64Ty(), "hoist_sext");
// 2. hoist_gep1 = getelementptr float, ptr BasePtr, i64 HoistedSExt
- Value *HoistedGEP1 = Builder.CreateGEP(LoadType, BasePtr,
- HoistedSExt, "hoist_gep1");
+ Value *HoistedGEP1 =
+ Builder.CreateGEP(LoadType, BasePtr, HoistedSExt, "hoist_gep1");
// 3. hoist_gep2 = getelementptr i8, ptr HoistedGEP1, i64 OffsetVal
Value *HoistedGEP2 = Builder.CreateGEP(Builder.getInt8Ty(), HoistedGEP1,
OffsetVal, "hoist_gep2");
// 4. hoisted_load = load float, ptr HoistedGEP2
- LoadInst *NewLoad =
- Builder.CreateLoad(LoadType, HoistedGEP2, "hoisted_load");
+ LoadInst *NewLoad = Builder.CreateLoad(LoadType, HoistedGEP2, "hoisted_load");
// Let the new load now take the place of the old load.
RHS->replaceAllUsesWith(NewLoad);
@@ -3455,7 +3451,7 @@ bool GVNPass::recognizeMinFindingSelectPattern(SelectInst *Select) {
// Get type of load.
Type *LoadType = dyn_cast<LoadInst>(LHS)->getType();
LLVM_DEBUG(dbgs() << "GVN: Transforming minimum finding pattern.\n");
- return transformMinFindingSelectPattern(L, LoadType,Preheader, BB, LHS, RHS,
+ return transformMinFindingSelectPattern(L, LoadType, Preheader, BB, LHS, RHS,
Comparison, Select, BasePtr, IndexVal,
OffsetVal);
}
>From ba3ce9fa0651edeaefdec1ee283175396ab773c5 Mon Sep 17 00:00:00 2001
From: Madhur Amilkanthwar <madhura at nvidia.com>
Date: Wed, 10 Dec 2025 02:09:20 -0800
Subject: [PATCH 5/5] fixup! address review comments
1. Added 10+ negative tests.
2. Added positive tests for non-float types.
3. Strengthning checks in recognize function
4. clang-format changes
5. Addressed other review comments
---
llvm/include/llvm/Transforms/Scalar/GVN.h | 1 +
llvm/lib/Transforms/Scalar/GVN.cpp | 170 ++++-
llvm/test/Transforms/GVN/PRE/gvn-min-pre.ll | 676 +++++++++++++++++++-
3 files changed, 810 insertions(+), 37 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h
index c35f9a03112cb..db74c2066013c 100644
--- a/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -46,6 +46,7 @@ class FunctionPass;
class GetElementPtrInst;
class ImplicitControlFlowTracking;
class LoadInst;
+class Loop;
class SelectInst;
class LoopInfo;
class MemDepResult;
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index c41518f874623..b514210c8cc1c 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -34,7 +34,6 @@
#include "llvm/Analysis/InstructionPrecedenceTracking.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemorySSA.h"
@@ -3334,31 +3333,64 @@ void GVNPass::assignValNumForDeadCode() {
}
}
+// Hoist the chain of operations for the second load to preheader.
+// In this transformation, we hoist the redundant load to the preheader,
+// caching the first value of the iteration. This value is used to compare with
+// the current value of the iteration and update the minimum value.
+// The comparison is done in the loop body using the new select instruction.
+//
+// *** Before transformation ***
+//
+// preheader:
+// ...
+// loop:
+// ...
+// ...
+// %val.first = load <TYPE>, ptr %ptr.first.load, align 4
+// %min.idx.ext = sext i32 %min.idx to i64
+// %ptr.<TYPE>.min = getelementptr <TYPE>, ptr %0, i64 %min.idx.ext
+// %ptr.second.load = getelementptr i8, ptr %ptr.<TYPE>.min, i64 -4
+// %val.current.min = load <TYPE>, ptr %ptr.second.load, align 4
+// ...
+// ...
+// br i1 %cond, label %loop, label %exit
+//
+// *** After transformation ***
+//
+// preheader:
+// %min.idx.ext = sext i32 %min.idx.ext to i64
+// %hoist_gep1 = getelementptr <TYPE>, ptr %0, i64 %min.idx.ext
+// %hoist_gep2 = getelementptr i8, ptr %hoist_gep1, i64 -4
+// %hoisted_load = load <TYPE>, ptr %hoist_gep2, align 4
+// br label %loop
+//
+// loop:
+// %val.first = load <TYPE>, ptr %ptr.first.load, align 4
+// ...
+// (new) %val.current.min = select i1 %cond, <TYPE> %hoisted_load, <TYPE>
+// %val.current.min
+// ...
+// ...
+// br i1 %cond, label %loop, label %exit
bool GVNPass::transformMinFindingSelectPattern(
Loop *L, Type *LoadType, BasicBlock *Preheader, BasicBlock *BB, Value *LHS,
- Value *RHS, CmpInst *Comparison, SelectInst *Select, Value *BasePtr,
+ Value *LoadVal, CmpInst *Comparison, SelectInst *Select, Value *BasePtr,
Value *IndexVal, Value *OffsetVal) {
- // Hoist the chain of operations for the second load to preheader.
- // %min.idx.ext = sext i32 %min.idx to i64
- // %ptr.float.min = getelementptr float, ptr %0, i64 %min.idx.ext
- // %ptr.second.load = getelementptr i8, ptr %ptr.float.min, i64 -4
- // %val.current.min = load float, ptr %ptr.second.load, align 4
- IRBuilder<> Builder(Preheader->getTerminator());
- PHINode *Phi = dyn_cast<PHINode>(IndexVal);
- if (!Phi) {
- LLVM_DEBUG(dbgs() << "GVN: IndexVal is not a PHI node\n");
- return false;
- }
+ assert(IndexVal && "IndexVal is null");
+ AAResults *AA = VN.getAliasAnalysis();
+ assert(AA && "AA is null");
- Value *InitialMinIndex = Phi->getIncomingValueForBlock(Preheader);
+ IRBuilder<> Builder(Preheader->getTerminator());
+ Value *InitialMinIndex =
+ dyn_cast<PHINode>(IndexVal)->getIncomingValueForBlock(Preheader);
// Insert PHI node at the top of this block.
// This PHI node will be used to memoize the current minimum value so far.
PHINode *KnownMinPhi = PHINode::Create(LoadType, 2, "known_min", BB->begin());
// Hoist the load and build the necessary operations.
- // 1. hoist_0 = sext i32 to i64
+ // 1. hoist_0 = sext i32 1 to i64
Value *HoistedSExt =
Builder.CreateSExt(InitialMinIndex, Builder.getInt64Ty(), "hoist_sext");
@@ -3370,12 +3402,40 @@ bool GVNPass::transformMinFindingSelectPattern(
Value *HoistedGEP2 = Builder.CreateGEP(Builder.getInt8Ty(), HoistedGEP1,
OffsetVal, "hoist_gep2");
+ MemoryLocation NewLoc = MemoryLocation(
+ HoistedGEP2,
+ LocationSize::precise(
+ L->getHeader()->getDataLayout().getTypeStoreSize(LoadType)));
+ // Check if any instruction in the loop clobbers this location.
+ bool CanHoist = true;
+ for (BasicBlock *BB : L->blocks()) {
+ for (Instruction &I : *BB) {
+ if (I.mayWriteToMemory()) {
+ // Check if this instruction may clobber our hoisted load.
+ ModRefInfo MRI = AA->getModRefInfo(&I, NewLoc);
+ if (isModOrRefSet(MRI)) {
+ LLVM_DEBUG(dbgs() << "GVN: Cannot hoist - may be clobbered by: " << I
+ << "\n");
+ CanHoist = false;
+ break;
+ }
+ }
+ }
+ if (!CanHoist)
+ break;
+ }
+ if (!CanHoist) {
+ LLVM_DEBUG(dbgs() << "GVN: Cannot hoist - may be clobbered by some "
+ "instruction in the loop.\n");
+ return false;
+ }
+
// 4. hoisted_load = load float, ptr HoistedGEP2
LoadInst *NewLoad = Builder.CreateLoad(LoadType, HoistedGEP2, "hoisted_load");
// Let the new load now take the place of the old load.
- RHS->replaceAllUsesWith(NewLoad);
- dyn_cast<LoadInst>(RHS)->eraseFromParent();
+ LoadVal->replaceAllUsesWith(NewLoad);
+ dyn_cast<LoadInst>(LoadVal)->eraseFromParent();
// Comparison should now compare the current value and the newly inserted
// PHI node.
@@ -3390,16 +3450,42 @@ bool GVNPass::transformMinFindingSelectPattern(
// with (hoisted) NewLoad from the preheader and CurrentMinSelect.
KnownMinPhi->addIncoming(NewLoad, Preheader);
KnownMinPhi->addIncoming(CurrentMinSelect, BB);
- LLVM_DEBUG(dbgs() << "Transformed the code\n");
+
+ if (MSSAU) {
+ auto *OrigUse =
+ MSSAU->getMemorySSA()->getMemoryAccess(dyn_cast<Instruction>(LoadVal));
+ if (OrigUse) {
+ MemoryAccess *DefiningAccess = OrigUse->getDefiningAccess();
+ MSSAU->createMemoryAccessInBB(NewLoad, DefiningAccess, Preheader,
+ MemorySSA::BeforeTerminator);
+ }
+ }
+ LLVM_DEBUG(
+ dbgs() << "GVN: Transformed the code for minimum finding pattern.\n");
return true;
}
+// We are looking for the following pattern:
+// loop:
+// ...
+// ...
+// %min.idx = phi i32 [ %initial_min_idx, %entry ], [ %min.idx.next, %loop ]
+// ...
+// %val.first = load <TYPE>, ptr %ptr.first.load, align 4
+// %min.idx.ext = sext i32 %min.idx to i64
+// %ptr.<TYPE>.min = getelementptr <TYPE>, ptr %0, i64 %min.idx.ext
+// %ptr.second.load = getelementptr i8, ptr %ptr.<TYPE>.min, i64 -4
+// %val.current.min = load <TYPE>, ptr %ptr.second.load, align 4
+// %cmp = <CMP_INST> <TYPE> %val.first, %val.current.min
+// ...
+// %min.idx.next = select i1 %cmp, ..., i32 %min.idx
+// ...
+// ...
+// br i1 ..., label %loop, ...
bool GVNPass::recognizeMinFindingSelectPattern(SelectInst *Select) {
- Value *BasePtr = nullptr, *IndexVal = nullptr, *OffsetVal = nullptr;
- LLVM_DEBUG(
- dbgs()
- << "GVN: Analyzing select instruction for minimum finding pattern.\n");
- LLVM_DEBUG(dbgs() << "GVN: Select: " << *Select << "\n");
+ IRBuilder<> Builder(Select);
+ Value *BasePtr = nullptr, *IndexVal = nullptr, *OffsetVal = nullptr,
+ *SExt = nullptr;
BasicBlock *BB = Select->getParent();
// If the block is not in a loop, bail out.
@@ -3439,21 +3525,41 @@ bool GVNPass::recognizeMinFindingSelectPattern(SelectInst *Select) {
return false;
}
- if (!match(RHS,
- m_Load(m_GEP(m_GEP(m_Value(BasePtr), m_SExt(m_Value(IndexVal))),
- m_Value(OffsetVal))))) {
+ if (!match(RHS, m_Load(m_GEP(m_GEP(m_Value(BasePtr), m_Value(SExt)),
+ m_Value(OffsetVal))))) {
LLVM_DEBUG(dbgs() << "GVN: Not a required load pattern.\n");
return false;
}
+ // Check if the SExt instruction is a sext instruction.
+ SExtInst *SEInst = dyn_cast<SExtInst>(SExt);
+ if (!SEInst) {
+ LLVM_DEBUG(dbgs() << "GVN: not a sext instruction.\n");
+ return false;
+ }
+ // Check if the "To" and "from" type of the sext instruction are i64 and i32
+ // respectively.
+ if (SEInst->getType() != Builder.getInt64Ty() ||
+ SEInst->getOperand(0)->getType() != Builder.getInt32Ty()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "GVN: Not matching the required type for sext instruction.\n");
+ return false;
+ }
+
+ IndexVal = SEInst->getOperand(0);
+ // Check if the IndexVal is a PHI node.
+ PHINode *Phi = dyn_cast<PHINode>(IndexVal);
+ if (!Phi) {
+ LLVM_DEBUG(dbgs() << "GVN: IndexVal is not a PHI node\n");
+ return false;
+ }
+
LLVM_DEBUG(dbgs() << "GVN: Found minimum finding pattern in Block: "
<< Select->getParent()->getName() << ".\n");
- // Get type of load.
- Type *LoadType = dyn_cast<LoadInst>(LHS)->getType();
- LLVM_DEBUG(dbgs() << "GVN: Transforming minimum finding pattern.\n");
- return transformMinFindingSelectPattern(L, LoadType, Preheader, BB, LHS, RHS,
- Comparison, Select, BasePtr, IndexVal,
- OffsetVal);
+ return transformMinFindingSelectPattern(L, dyn_cast<LoadInst>(LHS)->getType(),
+ Preheader, BB, LHS, RHS, Comparison,
+ Select, BasePtr, IndexVal, OffsetVal);
}
class llvm::gvn::GVNLegacyPass : public FunctionPass {
diff --git a/llvm/test/Transforms/GVN/PRE/gvn-min-pre.ll b/llvm/test/Transforms/GVN/PRE/gvn-min-pre.ll
index 19fec514b28fe..1435417f80710 100644
--- a/llvm/test/Transforms/GVN/PRE/gvn-min-pre.ll
+++ b/llvm/test/Transforms/GVN/PRE/gvn-min-pre.ll
@@ -1,11 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -passes=gvn -S < %s | FileCheck %s
-define void @test_gvn_min_pattern(ptr %0) {
+; Test the minimum finding pattern.
+; The following test case is extracted from rnflow app in Polyhedron benchmark suite.
+define void @test_gvn_min_pattern(ptr %0, i32 %initial_min_idx) {
; CHECK-LABEL: define void @test_gvn_min_pattern(
-; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[INITIAL_MIN_IDX:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[HOIST_GEP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 1
+; CHECK-NEXT: [[HOIST_SEXT:%.*]] = sext i32 [[INITIAL_MIN_IDX]] to i64
+; CHECK-NEXT: [[HOIST_GEP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 [[HOIST_SEXT]]
; CHECK-NEXT: [[HOIST_GEP2:%.*]] = getelementptr i8, ptr [[HOIST_GEP1]], i64 -4
; CHECK-NEXT: [[HOISTED_LOAD:%.*]] = load float, ptr [[HOIST_GEP2]], align 4
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -13,7 +16,7 @@ define void @test_gvn_min_pattern(ptr %0) {
; CHECK-NEXT: [[KNOWN_MIN:%.*]] = phi float [ [[HOISTED_LOAD]], %[[ENTRY]] ], [ [[CURRENT_MIN:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[INDVARS_IV_I:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT_I:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[LOOP_COUNTER:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[LOOP_COUNTER_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i32 [ [[INITIAL_MIN_IDX]], %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[INDVARS_IV_NEXT_I]] = add nsw i64 [[INDVARS_IV_I]], -1
; CHECK-NEXT: [[PTR_FLOAT_IV:%.*]] = getelementptr float, ptr [[TMP0]], i64 [[INDVARS_IV_I]]
; CHECK-NEXT: [[PTR_FIRST_LOAD:%.*]] = getelementptr i8, ptr [[PTR_FLOAT_IV]], i64 -8
@@ -37,7 +40,7 @@ entry:
loop: ; preds = %loop, %entry
%indvars.iv.i = phi i64 [ 1, %entry ], [ %indvars.iv.next.i, %loop ]
%loop.counter = phi i64 [ 0, %entry ], [ %loop.counter.next, %loop ]
- %min.idx = phi i32 [ 1, %entry ], [ %min.idx.next, %loop ]
+ %min.idx = phi i32 [ %initial_min_idx, %entry ], [ %min.idx.next, %loop ]
%indvars.iv.next.i = add nsw i64 %indvars.iv.i, -1
%ptr.float.iv = getelementptr float, ptr %0, i64 %indvars.iv.i
%ptr.first.load = getelementptr i8, ptr %ptr.float.iv, i64 -8
@@ -56,3 +59,666 @@ loop: ; preds = %loop, %entry
exit:
ret void
}
+
+; Positive test: Minimum finding pattern with i32 loads.
+define void @test_gvn_min_pattern_i32(ptr %arr, i32 %initial_min_idx) {
+; CHECK-LABEL: define void @test_gvn_min_pattern_i32(
+; CHECK-SAME: ptr [[ARR:%.*]], i32 [[INITIAL_MIN_IDX:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[HOIST_SEXT:%.*]] = sext i32 [[INITIAL_MIN_IDX]] to i64
+; CHECK-NEXT: [[HOIST_GEP1:%.*]] = getelementptr i32, ptr [[ARR]], i64 [[HOIST_SEXT]]
+; CHECK-NEXT: [[HOIST_GEP2:%.*]] = getelementptr i8, ptr [[HOIST_GEP1]], i64 -4
+; CHECK-NEXT: [[HOISTED_LOAD:%.*]] = load i32, ptr [[HOIST_GEP2]], align 4
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[KNOWN_MIN:%.*]] = phi i32 [ [[HOISTED_LOAD]], %[[ENTRY]] ], [ [[CURRENT_MIN:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[INDVARS_IV_I:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT_I:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[LOOP_COUNTER:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[LOOP_COUNTER_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i32 [ [[INITIAL_MIN_IDX]], %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_I]] = add nsw i64 [[INDVARS_IV_I]], -1
+; CHECK-NEXT: [[PTR_FLOAT_IV:%.*]] = getelementptr i32, ptr [[ARR]], i64 [[INDVARS_IV_I]]
+; CHECK-NEXT: [[PTR_FIRST_LOAD:%.*]] = getelementptr i8, ptr [[PTR_FLOAT_IV]], i64 -8
+; CHECK-NEXT: [[VAL_FIRST:%.*]] = load i32, ptr [[PTR_FIRST_LOAD]], align 4
+; CHECK-NEXT: [[MIN_IDX_EXT:%.*]] = sext i32 [[MIN_IDX]] to i64
+; CHECK-NEXT: [[PTR_FLOAT_MIN:%.*]] = getelementptr i32, ptr [[ARR]], i64 [[MIN_IDX_EXT]]
+; CHECK-NEXT: [[PTR_SECOND_LOAD:%.*]] = getelementptr i8, ptr [[PTR_FLOAT_MIN]], i64 -4
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[VAL_FIRST]], [[KNOWN_MIN]]
+; CHECK-NEXT: [[NEXT_IDX_TRUNC:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_I]] to i32
+; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i32 [[NEXT_IDX_TRUNC]], i32 [[MIN_IDX]]
+; CHECK-NEXT: [[LOOP_COUNTER_NEXT]] = add nsw i64 [[LOOP_COUNTER]], -1
+; CHECK-NEXT: [[LOOP_CONTINUE:%.*]] = icmp samesign ugt i64 [[LOOP_COUNTER]], 1
+; CHECK-NEXT: [[CURRENT_MIN]] = select i1 [[CMP]], i32 [[VAL_FIRST]], i32 [[KNOWN_MIN]]
+; CHECK-NEXT: br i1 [[LOOP_CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %indvars.iv.i = phi i64 [ 1, %entry ], [ %indvars.iv.next.i, %loop ]
+ %loop.counter = phi i64 [ 0, %entry ], [ %loop.counter.next, %loop ]
+ %min.idx = phi i32 [ %initial_min_idx, %entry ], [ %min.idx.next, %loop ]
+ %indvars.iv.next.i = add nsw i64 %indvars.iv.i, -1
+ %ptr.i32.iv = getelementptr i32, ptr %arr, i64 %indvars.iv.i
+ %ptr.first.load = getelementptr i8, ptr %ptr.i32.iv, i64 -8
+ %val.first = load i32, ptr %ptr.first.load, align 4
+ %min.idx.ext = sext i32 %min.idx to i64
+ %ptr.i32.min = getelementptr i32, ptr %arr, i64 %min.idx.ext
+ %ptr.second.load = getelementptr i8, ptr %ptr.i32.min, i64 -4
+ %val.current.min = load i32, ptr %ptr.second.load, align 4
+ %cmp = icmp slt i32 %val.first, %val.current.min
+ %next.idx.trunc = trunc nsw i64 %indvars.iv.next.i to i32
+ %min.idx.next = select i1 %cmp, i32 %next.idx.trunc, i32 %min.idx
+ %loop.counter.next = add nsw i64 %loop.counter, -1
+ %loop.continue = icmp samesign ugt i64 %loop.counter, 1
+ br i1 %loop.continue, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+; Positive test: Minimum finding pattern with i64 loads.
+define void @test_gvn_min_pattern_i64(ptr %0, i32 %initial_min_idx) {
+; CHECK-LABEL: define void @test_gvn_min_pattern_i64(
+; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[INITIAL_MIN_IDX:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[HOIST_SEXT:%.*]] = sext i32 [[INITIAL_MIN_IDX]] to i64
+; CHECK-NEXT: [[HOIST_GEP1:%.*]] = getelementptr i64, ptr [[TMP0]], i64 [[HOIST_SEXT]]
+; CHECK-NEXT: [[HOIST_GEP2:%.*]] = getelementptr i8, ptr [[HOIST_GEP1]], i64 -4
+; CHECK-NEXT: [[HOISTED_LOAD:%.*]] = load i64, ptr [[HOIST_GEP2]], align 4
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[KNOWN_MIN:%.*]] = phi i64 [ [[HOISTED_LOAD]], %[[ENTRY]] ], [ [[CURRENT_MIN:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[INDVARS_IV_I:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT_I:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[LOOP_COUNTER:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[LOOP_COUNTER_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i32 [ [[INITIAL_MIN_IDX]], %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_I]] = add nsw i64 [[INDVARS_IV_I]], -1
+; CHECK-NEXT: [[PTR_I64_IV:%.*]] = getelementptr i64, ptr [[TMP0]], i64 [[INDVARS_IV_I]]
+; CHECK-NEXT: [[PTR_FIRST_LOAD:%.*]] = getelementptr i8, ptr [[PTR_I64_IV]], i64 -8
+; CHECK-NEXT: [[VAL_FIRST:%.*]] = load i64, ptr [[PTR_FIRST_LOAD]], align 4
+; CHECK-NEXT: [[MIN_IDX_EXT:%.*]] = sext i32 [[MIN_IDX]] to i64
+; CHECK-NEXT: [[PTR_I64_MIN:%.*]] = getelementptr i64, ptr [[TMP0]], i64 [[MIN_IDX_EXT]]
+; CHECK-NEXT: [[PTR_SECOND_LOAD:%.*]] = getelementptr i8, ptr [[PTR_I64_MIN]], i64 -4
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[VAL_FIRST]], [[KNOWN_MIN]]
+; CHECK-NEXT: [[NEXT_IDX_TRUNC:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT_I]] to i32
+; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i32 [[NEXT_IDX_TRUNC]], i32 [[MIN_IDX]]
+; CHECK-NEXT: [[LOOP_COUNTER_NEXT]] = add nsw i64 [[LOOP_COUNTER]], -1
+; CHECK-NEXT: [[LOOP_CONTINUE:%.*]] = icmp samesign ugt i64 [[LOOP_COUNTER]], 1
+; CHECK-NEXT: [[CURRENT_MIN]] = select i1 [[CMP]], i64 [[VAL_FIRST]], i64 [[KNOWN_MIN]]
+; CHECK-NEXT: br i1 [[LOOP_CONTINUE]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %indvars.iv.i = phi i64 [ 1, %entry ], [ %indvars.iv.next.i, %loop ]
+ %loop.counter = phi i64 [ 0, %entry ], [ %loop.counter.next, %loop ]
+ %min.idx = phi i32 [ %initial_min_idx, %entry ], [ %min.idx.next, %loop ]
+ %indvars.iv.next.i = add nsw i64 %indvars.iv.i, -1
+ %ptr.i64.iv = getelementptr i64, ptr %0, i64 %indvars.iv.i
+ %ptr.first.load = getelementptr i8, ptr %ptr.i64.iv, i64 -8
+ %val.first = load i64, ptr %ptr.first.load, align 4
+ %min.idx.ext = sext i32 %min.idx to i64
+ %ptr.i64.min = getelementptr i64, ptr %0, i64 %min.idx.ext
+ %ptr.second.load = getelementptr i8, ptr %ptr.i64.min, i64 -4
+ %val.current.min = load i64, ptr %ptr.second.load, align 4
+ %cmp = icmp slt i64 %val.first, %val.current.min
+ %next.idx.trunc = trunc nsw i64 %indvars.iv.next.i to i32
+ %min.idx.next = select i1 %cmp, i32 %next.idx.trunc, i32 %min.idx
+ %loop.counter.next = add nsw i64 %loop.counter, -1
+ %loop.continue = icmp samesign ugt i64 %loop.counter, 1
+ br i1 %loop.continue, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+; Negative test: Select not in a loop.
+define void @test_not_in_loop(ptr %arr) {
+; CHECK-LABEL: define void @test_not_in_loop(
+; CHECK-SAME: ptr [[ARR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[ARR]], align 4
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[ARR]], i64 1
+; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[LOAD1]], [[LOAD2]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[LOAD1]], float [[LOAD2]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %gep1 = getelementptr float, ptr %arr, i32 0
+ %load1 = load float, ptr %gep1
+ %sext = sext i32 1 to i64
+ %gep2 = getelementptr float, ptr %arr, i64 %sext
+ %gep3 = getelementptr i8, ptr %gep2, i64 0
+ %load2 = load float, ptr %gep3
+ %cmp = fcmp olt float %load1, %load2
+ %sel = select i1 %cmp, float %load1, float %load2
+ ret void
+}
+
+; Negative test: Loop without preheader (multiple entry points).
+define void @test_no_preheader(ptr %arr, i32 %n, i1 %cond) {
+; CHECK-LABEL: define void @test_no_preheader(
+; CHECK-SAME: ptr [[ARR:%.*]], i32 [[N:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 [[COND]], label %[[LOOP:.*]], label %[[OTHER_ENTRY:.*]]
+; CHECK: [[OTHER_ENTRY]]:
+; CHECK-NEXT: br label %[[LOOP]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 0, %[[OTHER_ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr float, ptr [[ARR]], i32 [[I]]
+; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[I]] to i64
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[ARR]], i64 [[SEXT]]
+; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
+; CHECK-NEXT: [[EXIT:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXIT]], label %[[LOOP]], label %[[EXIT_BLOCK:.*]]
+; CHECK: [[EXIT_BLOCK]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %loop, label %other_entry
+
+other_entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ 0, %other_entry ], [ %i.next, %loop ]
+ %gep1 = getelementptr float, ptr %arr, i32 %i
+ %load1 = load float, ptr %gep1
+ %sext = sext i32 %i to i64
+ %gep2 = getelementptr float, ptr %arr, i64 %sext
+ %gep3 = getelementptr i8, ptr %gep2, i64 0
+ %load2 = load float, ptr %gep3
+ %cmp = fcmp olt float %load1, %load2
+ %sel = select i1 %cmp, float %load1, float %load2
+ %i.next = add i32 %i, 1
+ %exit = icmp slt i32 %i.next, %n
+ br i1 %exit, label %loop, label %exit_block
+
+exit_block:
+ ret void
+}
+
+; Negative test: Condition is not a CmpInst.
+define void @test_condition_not_cmp(ptr %arr, i32 %n, i1 %bool) {
+; CHECK-LABEL: define void @test_condition_not_cmp(
+; CHECK-SAME: ptr [[ARR:%.*]], i32 [[N:%.*]], i1 [[BOOL:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr float, ptr [[ARR]], i32 [[I]]
+; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[I]] to i64
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[ARR]], i64 [[SEXT]]
+; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
+; CHECK-NEXT: [[EXIT:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXIT]], label %[[LOOP]], label %[[EXIT_BLOCK:.*]]
+; CHECK: [[EXIT_BLOCK]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ %gep1 = getelementptr float, ptr %arr, i32 %i
+ %load1 = load float, ptr %gep1
+ %sext = sext i32 %i to i64
+ %gep2 = getelementptr float, ptr %arr, i64 %sext
+ %gep3 = getelementptr i8, ptr %gep2, i64 0
+ %load2 = load float, ptr %gep3
+ %sel = select i1 %bool, float %load1, float %load2
+ %i.next = add i32 %i, 1
+ %exit = icmp slt i32 %i.next, %n
+ br i1 %exit, label %loop, label %exit_block
+
+exit_block:
+ ret void
+}
+
+; Negative test: Wrong comparison predicate (>= instead of <).
+define void @test_wrong_predicate(ptr %arr, i32 %n) {
+; CHECK-LABEL: define void @test_wrong_predicate(
+; CHECK-SAME: ptr [[ARR:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr float, ptr [[ARR]], i32 [[I]]
+; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4
+; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[I]] to i64
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[ARR]], i64 [[SEXT]]
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[LOAD1]], [[LOAD1]]
+; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
+; CHECK-NEXT: [[EXIT:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXIT]], label %[[LOOP]], label %[[EXIT_BLOCK:.*]]
+; CHECK: [[EXIT_BLOCK]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ %gep1 = getelementptr float, ptr %arr, i32 %i
+ %load1 = load float, ptr %gep1
+ %sext = sext i32 %i to i64
+ %gep2 = getelementptr float, ptr %arr, i64 %sext
+ %gep3 = getelementptr i8, ptr %gep2, i64 0
+ %load2 = load float, ptr %gep3
+ %cmp = fcmp oge float %load1, %load2
+ %sel = select i1 %cmp, float %load1, float %load2
+ %i.next = add i32 %i, 1
+ %exit = icmp slt i32 %i.next, %n
+ br i1 %exit, label %loop, label %exit_block
+
+exit_block:
+ ret void
+}
+
+; Negative test: Not both operands are loads (constant RHS).
+define void @test_not_both_loads(ptr %arr, i32 %n) {
+; CHECK-LABEL: define void @test_not_both_loads(
+; CHECK-SAME: ptr [[ARR:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr float, ptr [[ARR]], i32 [[I]]
+; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[LOAD1]], float 0.000000e+00
+; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
+; CHECK-NEXT: [[EXIT:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXIT]], label %[[LOOP]], label %[[EXIT_BLOCK:.*]]
+; CHECK: [[EXIT_BLOCK]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ %gep1 = getelementptr float, ptr %arr, i32 %i
+ %load1 = load float, ptr %gep1
+ %cmp = fcmp olt float %load1, 0.0
+ %sel = select i1 %cmp, float %load1, float 0.0
+ %i.next = add i32 %i, 1
+ %exit = icmp slt i32 %i.next, %n
+ br i1 %exit, label %loop, label %exit_block
+
+exit_block:
+ ret void
+}
+
+; Negative test: Load doesn't match GEP(GEP(...))nested pattern.
+define void @test_wrong_gep_pattern(ptr %arr, i32 %n) {
+; CHECK-LABEL: define void @test_wrong_gep_pattern(
+; CHECK-SAME: ptr [[ARR:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr float, ptr [[ARR]], i32 [[I]]
+; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
+; CHECK-NEXT: [[EXIT:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXIT]], label %[[LOOP]], label %[[EXIT_BLOCK:.*]]
+; CHECK: [[EXIT_BLOCK]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ %gep1 = getelementptr float, ptr %arr, i32 %i
+ %load1 = load float, ptr %gep1
+ ; Simple GEP, not nested GEP(GEP(...))
+ %gep2 = getelementptr float, ptr %arr, i32 %i
+ %load2 = load float, ptr %gep2
+ %cmp = fcmp olt float %load1, %load2
+ %sel = select i1 %cmp, float %load1, float %load2
+ %i.next = add i32 %i, 1
+ %exit = icmp slt i32 %i.next, %n
+ br i1 %exit, label %loop, label %exit_block
+
+exit_block:
+ ret void
+}
+
+; Negative test: ZExt instead of SExt.
+define void @test_no_sext(ptr %arr, i32 %n) {
+; CHECK-LABEL: define void @test_no_sext(
+; CHECK-SAME: ptr [[ARR:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr float, ptr [[ARR]], i32 [[I]]
+; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[I]] to i64
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[ARR]], i64 [[ZEXT]]
+; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[LOAD1]], [[LOAD2]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[LOAD1]], float [[LOAD2]]
+; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
+; CHECK-NEXT: [[EXIT:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXIT]], label %[[LOOP]], label %[[EXIT_BLOCK:.*]]
+; CHECK: [[EXIT_BLOCK]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ %gep1 = getelementptr float, ptr %arr, i32 %i
+ %load1 = load float, ptr %gep1
+ %zext = zext i32 %i to i64
+ %gep2 = getelementptr float, ptr %arr, i64 %zext
+ %gep3 = getelementptr i8, ptr %gep2, i64 0
+ %load2 = load float, ptr %gep3
+ %cmp = fcmp olt float %load1, %load2
+ %sel = select i1 %cmp, float %load1, float %load2
+ %i.next = add i32 %i, 1
+ %exit = icmp slt i32 %i.next, %n
+ br i1 %exit, label %loop, label %exit_block
+
+exit_block:
+ ret void
+}
+
+; Negative test: SExt with wrong types (i16->i32 instead of i32->i64).
+define void @test_wrong_sext_types(ptr %arr, i32 %n) {
+; CHECK-LABEL: define void @test_wrong_sext_types(
+; CHECK-SAME: ptr [[ARR:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr float, ptr [[ARR]], i32 [[I]]
+; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4
+; CHECK-NEXT: [[I16:%.*]] = trunc i32 [[I]] to i16
+; CHECK-NEXT: [[SEXT:%.*]] = sext i16 [[I16]] to i32
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[ARR]], i32 [[SEXT]]
+; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[LOAD1]], [[LOAD2]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[LOAD1]], float [[LOAD2]]
+; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
+; CHECK-NEXT: [[EXIT:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXIT]], label %[[LOOP]], label %[[EXIT_BLOCK:.*]]
+; CHECK: [[EXIT_BLOCK]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ %gep1 = getelementptr float, ptr %arr, i32 %i
+ %load1 = load float, ptr %gep1
+ %i16 = trunc i32 %i to i16
+ %sext = sext i16 %i16 to i32
+ %gep2 = getelementptr float, ptr %arr, i32 %sext
+ %gep3 = getelementptr i8, ptr %gep2, i32 0
+ %load2 = load float, ptr %gep3
+ %cmp = fcmp olt float %load1, %load2
+ %sel = select i1 %cmp, float %load1, float %load2
+ %i.next = add i32 %i, 1
+ %exit = icmp slt i32 %i.next, %n
+ br i1 %exit, label %loop, label %exit_block
+
+exit_block:
+ ret void
+}
+
+; Negative test: IndexVal is a constant, not a PHI node.
+define void @test_index_is_constant(ptr %arr, i32 %n) {
+; CHECK-LABEL: define void @test_index_is_constant(
+; CHECK-SAME: ptr [[ARR:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[ARR]], i64 5
+; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr float, ptr [[ARR]], i32 [[I]]
+; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[LOAD1]], [[LOAD2]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[LOAD1]], float [[LOAD2]]
+; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
+; CHECK-NEXT: [[EXIT:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXIT]], label %[[LOOP]], label %[[EXIT_BLOCK:.*]]
+; CHECK: [[EXIT_BLOCK]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+
+ %gep1 = getelementptr float, ptr %arr, i32 %i
+ %load1 = load float, ptr %gep1
+ ; IndexVal is a constant (5), not a PHI node!
+ %sext = sext i32 5 to i64
+ %gep2 = getelementptr float, ptr %arr, i64 %sext
+ %gep3 = getelementptr i8, ptr %gep2, i64 0
+ %load2 = load float, ptr %gep3
+ %cmp = fcmp olt float %load1, %load2
+ %sel = select i1 %cmp, float %load1, float %load2
+ %i.next = add i32 %i, 1
+ %exit = icmp slt i32 %i.next, %n
+ br i1 %exit, label %loop, label %exit_block
+
+exit_block:
+ ret void
+}
+
+declare void @external_function(ptr)
+
+; Negative test: Initial min index value is not 1.
+define void @test_initial_index_not_one(ptr %arr, i32 %n) {
+; CHECK-LABEL: define void @test_initial_index_not_one(
+; CHECK-SAME: ptr [[ARR:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr float, ptr [[ARR]], i32 [[I]]
+; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4
+; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[MIN_IDX]] to i64
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[ARR]], i64 [[SEXT]]
+; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[LOAD1]], [[LOAD2]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[LOAD1]], float [[LOAD2]]
+; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i32 [[I]], i32 [[MIN_IDX]]
+; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
+; CHECK-NEXT: [[EXIT:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXIT]], label %[[LOOP]], label %[[EXIT_BLOCK:.*]]
+; CHECK: [[EXIT_BLOCK]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ ; Initial value is 0, not 1!
+ %min.idx = phi i32 [ 0, %entry ], [ %min.idx.next, %loop ]
+
+ %gep1 = getelementptr float, ptr %arr, i32 %i
+ %load1 = load float, ptr %gep1
+ %sext = sext i32 %min.idx to i64
+ %gep2 = getelementptr float, ptr %arr, i64 %sext
+ %gep3 = getelementptr i8, ptr %gep2, i64 0
+ %load2 = load float, ptr %gep3
+ %cmp = fcmp olt float %load1, %load2
+ %sel = select i1 %cmp, float %load1, float %load2
+ %min.idx.next = select i1 %cmp, i32 %i, i32 %min.idx
+ %i.next = add i32 %i, 1
+ %exit = icmp slt i32 %i.next, %n
+ br i1 %exit, label %loop, label %exit_block
+
+exit_block:
+ ret void
+}
+
+; Negative test: Pattern recognized but hoisting not safe due to aliasing store in loop.
+define void @test_aliasing_store_in_loop(ptr %arr, i32 %n) {
+; CHECK-LABEL: define void @test_aliasing_store_in_loop(
+; CHECK-SAME: ptr [[ARR:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[STORE_GEP:%.*]] = getelementptr float, ptr [[ARR]], i32 [[I]]
+; CHECK-NEXT: store float 0.000000e+00, ptr [[STORE_GEP]], align 4
+; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[MIN_IDX]] to i64
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[ARR]], i64 [[SEXT]]
+; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float 0.000000e+00, [[LOAD2]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float 0.000000e+00, float [[LOAD2]]
+; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i32 [[I]], i32 [[MIN_IDX]]
+; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
+; CHECK-NEXT: [[EXIT:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXIT]], label %[[LOOP]], label %[[EXIT_BLOCK:.*]]
+; CHECK: [[EXIT_BLOCK]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ %min.idx = phi i32 [ 1, %entry ], [ %min.idx.next, %loop ]
+
+ ; This store aliases with the loads, preventing hoisting.
+ %store.gep = getelementptr float, ptr %arr, i32 %i
+ store float 0.0, ptr %store.gep
+
+ %gep1 = getelementptr float, ptr %arr, i32 %i
+ %load1 = load float, ptr %gep1
+ %sext = sext i32 %min.idx to i64
+ %gep2 = getelementptr float, ptr %arr, i64 %sext
+ %gep3 = getelementptr i8, ptr %gep2, i64 0
+ %load2 = load float, ptr %gep3
+ %cmp = fcmp olt float %load1, %load2
+ %sel = select i1 %cmp, float %load1, float %load2
+ %min.idx.next = select i1 %cmp, i32 %i, i32 %min.idx
+ %i.next = add i32 %i, 1
+ %exit = icmp slt i32 %i.next, %n
+ br i1 %exit, label %loop, label %exit_block
+
+exit_block:
+ ret void
+}
+
+; Negative test: Pattern recognized but BasePtr is not loop invariant.
+define void @test_non_invariant_base_ptr(ptr %arr, i32 %n) {
+; CHECK-LABEL: define void @test_non_invariant_base_ptr(
+; CHECK-SAME: ptr [[ARR:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[BASE_PTR:%.*]] = phi ptr [ [[ARR]], %[[ENTRY]] ], [ [[BASE_PTR_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr float, ptr [[BASE_PTR]], i32 [[I]]
+; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4
+; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[MIN_IDX]] to i64
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[BASE_PTR]], i64 [[SEXT]]
+; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[LOAD1]], [[LOAD2]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[LOAD1]], float [[LOAD2]]
+; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i32 [[I]], i32 [[MIN_IDX]]
+; CHECK-NEXT: [[BASE_PTR_NEXT]] = getelementptr float, ptr [[BASE_PTR]], i32 1
+; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
+; CHECK-NEXT: [[EXIT:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXIT]], label %[[LOOP]], label %[[EXIT_BLOCK:.*]]
+; CHECK: [[EXIT_BLOCK]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ %min.idx = phi i32 [ 1, %entry ], [ %min.idx.next, %loop ]
+ ; Base pointer changes each iteration - not loop invariant!
+ %base.ptr = phi ptr [ %arr, %entry ], [ %base.ptr.next, %loop ]
+
+ %gep1 = getelementptr float, ptr %base.ptr, i32 %i
+ %load1 = load float, ptr %gep1
+ %sext = sext i32 %min.idx to i64
+ %gep2 = getelementptr float, ptr %base.ptr, i64 %sext
+ %gep3 = getelementptr i8, ptr %gep2, i64 0
+ %load2 = load float, ptr %gep3
+ %cmp = fcmp olt float %load1, %load2
+ %sel = select i1 %cmp, float %load1, float %load2
+ %min.idx.next = select i1 %cmp, i32 %i, i32 %min.idx
+ %base.ptr.next = getelementptr float, ptr %base.ptr, i32 1
+ %i.next = add i32 %i, 1
+ %exit = icmp slt i32 %i.next, %n
+ br i1 %exit, label %loop, label %exit_block
+
+exit_block:
+ ret void
+}
+
+; Negative test: Pattern recognized but may-alias call in loop.
+define void @test_may_alias_call_in_loop(ptr %arr, i32 %n) {
+; CHECK-LABEL: define void @test_may_alias_call_in_loop(
+; CHECK-SAME: ptr [[ARR:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: call void @external_function(ptr [[ARR]])
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr float, ptr [[ARR]], i32 [[I]]
+; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4
+; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[MIN_IDX]] to i64
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[ARR]], i64 [[SEXT]]
+; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[LOAD1]], [[LOAD2]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[LOAD1]], float [[LOAD2]]
+; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i32 [[I]], i32 [[MIN_IDX]]
+; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
+; CHECK-NEXT: [[EXIT:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXIT]], label %[[LOOP]], label %[[EXIT_BLOCK:.*]]
+; CHECK: [[EXIT_BLOCK]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ %min.idx = phi i32 [ 1, %entry ], [ %min.idx.next, %loop ]
+
+ ; External function may modify memory.
+ call void @external_function(ptr %arr)
+
+ %gep1 = getelementptr float, ptr %arr, i32 %i
+ %load1 = load float, ptr %gep1
+ %sext = sext i32 %min.idx to i64
+ %gep2 = getelementptr float, ptr %arr, i64 %sext
+ %gep3 = getelementptr i8, ptr %gep2, i64 0
+ %load2 = load float, ptr %gep3
+ %cmp = fcmp olt float %load1, %load2
+ %sel = select i1 %cmp, float %load1, float %load2
+ %min.idx.next = select i1 %cmp, i32 %i, i32 %min.idx
+ %i.next = add i32 %i, 1
+ %exit = icmp slt i32 %i.next, %n
+ br i1 %exit, label %loop, label %exit_block
+
+exit_block:
+ ret void
+}
More information about the llvm-commits
mailing list