[llvm] cd2caf6 - [LV] Simplify extract-lane with scalar operand to the scalar value itself. (#174534)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 11 18:03:48 PST 2026
Author: Elvis Wang
Date: 2026-01-12T10:03:44+08:00
New Revision: cd2caf658071157b35471f54a4afe2aa8f86b473
URL: https://github.com/llvm/llvm-project/commit/cd2caf658071157b35471f54a4afe2aa8f86b473
DIFF: https://github.com/llvm/llvm-project/commit/cd2caf658071157b35471f54a4afe2aa8f86b473.diff
LOG: [LV] Simplify extract-lane with scalar operand to the scalar value itself. (#174534)
This patch simplifies extract-lane(%lane_num, %X) to %X when %X is a
scalar value. Extracting from a scalar is redundant since there is only
one value to extract.
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/test/Transforms/LoopVectorize/AArch64/early_exit_costs.ll
llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
llvm/test/Transforms/LoopVectorize/pr43166-fold-tail-by-masking.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 83fe45bfd0bbf..4ee5214ca05ab 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1150,6 +1150,11 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
/// underlying ingredient.
bool doesGeneratePerAllLanes() const;
+ /// Return the number of operands determined by the opcode of the
+ /// VPInstruction. Returns -1u if the number of operands cannot be determined
+ /// directly by the opcode.
+ static unsigned getNumOperandsForOpcode(unsigned Opcode);
+
private:
typedef unsigned char OpcodeTy;
OpcodeTy Opcode;
@@ -1166,13 +1171,6 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
/// existing value is returned rather than a generated one.
Value *generate(VPTransformState &State);
-#if !defined(NDEBUG)
- /// Return the number of operands determined by the opcode of the
- /// VPInstruction. Returns -1u if the number of operands cannot be determined
- /// directly by the opcode.
- static unsigned getNumOperandsForOpcode(unsigned Opcode);
-#endif
-
public:
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 3732d009b9537..e22a90426b599 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -291,7 +291,8 @@ struct Recipe_match {
if (R->getNumOperands() != std::tuple_size_v<Ops_t>) {
[[maybe_unused]] auto *RepR = dyn_cast<VPReplicateRecipe>(R);
- assert((Opcode == Instruction::PHI ||
+ assert(((isa<VPInstruction>(R) &&
+ VPInstruction::getNumOperandsForOpcode(Opcode) == -1u) ||
(RepR && std::tuple_size_v<Ops_t> ==
RepR->getNumOperands() - RepR->isPredicated())) &&
"non-variadic recipe with matched opcode does not have the "
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2c0772320c3cf..5716a6f82259e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -453,7 +453,6 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::BranchOnCount:
case VPInstruction::BranchOnTwoConds:
case VPInstruction::ComputeReductionResult:
- case VPInstruction::ExtractLane:
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::LogicalAnd:
case VPInstruction::PtrAdd:
@@ -476,6 +475,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::LastActiveLane:
case VPInstruction::SLPLoad:
case VPInstruction::SLPStore:
+ case VPInstruction::ExtractLane:
// Cannot determine the number of operands from the opcode.
return -1u;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a430f13f0c9c0..afa74937fc681 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1518,6 +1518,14 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
if (!Plan->isUnrolled())
return;
+ // Simplify extract-lane(%lane_num, %scalar_val) -> %scalar_val.
+ // After unrolling, extract-lane may be used to extract values from multiple
+ // scalar sources. Only simplify when extracting from a single scalar source.
+ if (match(Def, m_ExtractLane(m_VPValue(), m_VPValue(A))) &&
+ vputils::isSingleScalar(A)) {
+ return Def->replaceAllUsesWith(A);
+ }
+
// Hoist an invariant increment Y of a phi X, by having X start at Y.
if (match(Def, m_c_Add(m_VPValue(X), m_VPValue(Y))) && isa<VPIRValue>(Y) &&
isa<VPPhi>(X)) {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/early_exit_costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/early_exit_costs.ll
index 77b584655187c..3f9a17591a717 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/early_exit_costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/early_exit_costs.ll
@@ -94,8 +94,6 @@ define i64 @vectorization_not_profitable_due_to_trunc(ptr dereferenceable(800) %
; CHECK-LABEL: LV: Checking a loop in 'vectorization_not_profitable_due_to_trunc'
; CHECK: LV: Selecting VF: 1.
; CHECK-NEXT: Calculating cost of work in exit block vector.early.exit:
-; CHECK-NEXT: Cost of 1 for VF 1: EMIT vp<%first.active.lane> = first-active-lane ir<%t>
-; CHECK-NEXT: Cost of 0 for VF 1: EMIT vp<%early.exit.value> = extract-lane vp<%first.active.lane>, ir<%l>
; CHECK: LV: Vectorization is possible but not beneficial.
entry:
br label %loop.header
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
index 549222cd919da..3f999a5cc8fc5 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
@@ -242,14 +242,9 @@ define i64 @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1
-; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 2
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <vscale x 2 x i64> [[BROADCAST_SPLAT]], i64 [[TMP8]]
; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
; CHECK: for.end:
-; CHECK-NEXT: ret i64 [[TMP12]]
+; CHECK-NEXT: ret i64 [[V]]
;
entry:
br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
index 16a20c86e38f0..adda7c362b8e8 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
@@ -128,14 +128,9 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap
; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; SCALABLE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; SCALABLE: [[MIDDLE_BLOCK]]:
-; SCALABLE-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[TMP5]], 1
-; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP7]], 2
-; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP11]], 0
-; SCALABLE-NEXT: [[TMP12:%.*]] = extractelement <vscale x 2 x i64> [[BROADCAST_SPLAT]], i64 [[LAST_ACTIVE_LANE]]
; SCALABLE-NEXT: br label %[[FOR_END:.*]]
; SCALABLE: [[FOR_END]]:
-; SCALABLE-NEXT: ret i64 [[TMP12]]
+; SCALABLE-NEXT: ret i64 [[TMP6]]
;
; FIXEDLEN-LABEL: define i64 @uniform_load_outside_use(
; FIXEDLEN-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
@@ -192,14 +187,9 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap
; TF-SCALABLE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; TF-SCALABLE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; TF-SCALABLE: [[MIDDLE_BLOCK]]:
-; TF-SCALABLE-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[TMP5]], 1
-; TF-SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
-; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 0
-; TF-SCALABLE-NEXT: [[TMP12:%.*]] = extractelement <vscale x 2 x i64> [[BROADCAST_SPLAT]], i64 [[LAST_ACTIVE_LANE]]
; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]]
; TF-SCALABLE: [[FOR_END]]:
-; TF-SCALABLE-NEXT: ret i64 [[TMP12]]
+; TF-SCALABLE-NEXT: ret i64 [[V]]
;
entry:
br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/pr43166-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr43166-fold-tail-by-masking.ll
index 70a808f9b336e..e2ed2abcc81f8 100644
--- a/llvm/test/Transforms/LoopVectorize/pr43166-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr43166-fold-tail-by-masking.ll
@@ -46,14 +46,12 @@ define i64 @test1(i64 %y) {
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i64> splat (i64 3), [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
-; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP2]], <4 x i64> splat (i64 77), <4 x i64> [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], i64 77, i64 [[TMP3]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> <i1 false, i1 false, i1 false, i1 true>, i1 false)
-; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[PREDPHI]], i64 [[TMP4]]
; CHECK-NEXT: br label [[COND_END:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret i64 [[TMP5]]
@@ -89,14 +87,11 @@ define i64 @test2(i64 %y) {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[Y:%.*]], 0
-; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP1]], <4 x i64> splat (i64 77), <4 x i64> splat (i64 55)
+; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], i64 77, i64 55
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> <i1 false, i1 false, i1 false, i1 true>, i1 false)
-; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[PREDPHI]], i64 [[TMP3]]
; CHECK-NEXT: br label [[COND_END:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret i64 [[TMP4]]
More information about the llvm-commits
mailing list