[llvm] [LV] Prevent `extract-lane` generate unused IRs with single vector operand. (PR #172798)
Elvis Wang via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 22 22:42:13 PST 2025
https://github.com/ElvisWang123 updated https://github.com/llvm/llvm-project/pull/172798
>From f727483caba07efe6c6034570e19a13ad17acb9e Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Wed, 17 Dec 2025 21:55:19 -0800
Subject: [PATCH 1/2] [LV] Prevent `extract-lane` generate unused IRs with
single vector operand.
When `extract-lane` contains single vector operand. We do not need to
generate the instructions for calculating the offset of the second vector
operands.
This patch makes `extract-lane` generate simple `extractelement` when it
only contains single vector operand.
---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 24 ++++++++++---------
.../widen-gep-all-indices-invariant.ll | 3 ---
.../LoopVectorize/RISCV/dead-ops-cost.ll | 3 ---
.../Transforms/LoopVectorize/RISCV/divrem.ll | 3 ---
.../first-order-recurrence-scalable-vf1.ll | 3 ---
.../LoopVectorize/RISCV/pointer-induction.ll | 3 ---
.../LoopVectorize/RISCV/scalable-tailfold.ll | 3 ---
.../tail-folding-fixed-order-recurrence.ll | 3 ---
.../LoopVectorize/RISCV/uniform-load-store.ll | 6 -----
9 files changed, 13 insertions(+), 38 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index ddbf014c17d4f..5127707abd3ac 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -843,25 +843,27 @@ Value *VPInstruction::generate(VPTransformState &State) {
case VPInstruction::ExtractLane: {
Value *LaneToExtract = State.get(getOperand(0), true);
Type *IdxTy = State.TypeAnalysis.inferScalarType(getOperand(0));
- Value *Res = nullptr;
+ Value *Ext = State.VF.isScalar()
+ ? State.get(getOperand(1))
+ : Builder.CreateExtractElement(State.get(getOperand(1)),
+ LaneToExtract);
+
+ if (getNumOperands() == 2)
+ return Ext;
+
+ Value *Res = Ext;
Value *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
- for (unsigned Idx = 1; Idx != getNumOperands(); ++Idx) {
+ for (unsigned Idx = 2; Idx != getNumOperands(); ++Idx) {
Value *VectorStart =
Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1));
- Value *VectorIdx = Idx == 1
- ? LaneToExtract
- : Builder.CreateSub(LaneToExtract, VectorStart);
+ Value *VectorIdx = Builder.CreateSub(LaneToExtract, VectorStart);
Value *Ext = State.VF.isScalar()
? State.get(getOperand(Idx))
: Builder.CreateExtractElement(
State.get(getOperand(Idx)), VectorIdx);
- if (Res) {
- Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart);
- Res = Builder.CreateSelect(Cmp, Ext, Res);
- } else {
- Res = Ext;
- }
+ Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart);
+ Res = Builder.CreateSelect(Cmp, Ext, Res);
}
return Res;
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll
index 97cc6929e44d5..65c6fec32a599 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll
@@ -35,9 +35,6 @@ define i32 @gep_with_all_invariant_operands(ptr %src.0, ptr %src.1, i64 %n, i1 %
; CHECK-NEXT: [[TMP12:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], splat (i1 true)
; CHECK-NEXT: [[FIRST_INACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> [[TMP12]], i1 false)
; CHECK-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[FIRST_INACTIVE_LANE]], 1
-; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 4
-; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 0
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], i64 [[LAST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
index 9daf4236982bd..3bc34e6553df1 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
@@ -102,9 +102,6 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) {
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP2]] to i64
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 16
-; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP13]], 0
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 16 x i8> [[WIDE_MASKED_GATHER]], i64 [[TMP11]]
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
index e4ba6fe9d757d..d9adc699399fa 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
@@ -673,9 +673,6 @@ define i32 @udiv_sdiv_with_invariant_divisors(i8 %x, i16 %y, i1 %c) {
; CHECK: middle.block:
; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: [[TMP17:%.*]] = sub i64 [[TMP16]], 1
-; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 8
-; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 0
; CHECK-NEXT: [[MERGE_LCSSA:%.*]] = extractelement <vscale x 8 x i32> [[PREDPHI]], i64 [[TMP17]]
; CHECK-NEXT: br label [[LOOP_LATCH:%.*]]
; CHECK: exit:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll
index e35db479dc963..ee32d450606e0 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll
@@ -37,9 +37,6 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 {
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP15:%.*]] = sub i64 [[TMP12]], 1
; CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP15]], 1
-; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 2
-; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 0
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <vscale x 2 x i64> [[VP_OP_LOAD]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i32 [[TMP21]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll
index 164a5cd1ae3c0..1cbfe8a9e3b30 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll
@@ -115,9 +115,6 @@ define i1 @scalarize_ptr_induction(ptr %start, ptr %end, ptr noalias %dst, i1 %c
; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP29:%.*]] = sub i64 [[TMP26]], 1
-; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 2
-; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 0
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <vscale x 2 x i1> [[TMP17]], i64 [[TMP29]]
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
index 549222cd919da..fa39b345310f0 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
@@ -243,9 +243,6 @@ define i64 @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1
-; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 2
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 0
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <vscale x 2 x i64> [[BROADCAST_SPLAT]], i64 [[TMP8]]
; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
; CHECK: for.end:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll
index b95691f6e7c04..f332487986c23 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll
@@ -431,9 +431,6 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: [[TMP28:%.*]] = sub i64 [[TMP13]], 1
; IF-EVL-NEXT: [[TMP17:%.*]] = sub i64 [[TMP28]], 1
-; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
-; IF-EVL-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4
-; IF-EVL-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 0
; IF-EVL-NEXT: [[TMP21:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD]], i64 [[TMP17]]
; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
; IF-EVL-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP14]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
index 55e7018c49eec..5d65159f6e86c 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
@@ -129,9 +129,6 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap
; SCALABLE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; SCALABLE: [[MIDDLE_BLOCK]]:
; SCALABLE-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[TMP5]], 1
-; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP7]], 2
-; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP11]], 0
; SCALABLE-NEXT: [[TMP12:%.*]] = extractelement <vscale x 2 x i64> [[BROADCAST_SPLAT]], i64 [[LAST_ACTIVE_LANE]]
; SCALABLE-NEXT: br label %[[FOR_END:.*]]
; SCALABLE: [[FOR_END]]:
@@ -193,9 +190,6 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap
; TF-SCALABLE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; TF-SCALABLE: [[MIDDLE_BLOCK]]:
; TF-SCALABLE-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[TMP5]], 1
-; TF-SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
-; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 0
; TF-SCALABLE-NEXT: [[TMP12:%.*]] = extractelement <vscale x 2 x i64> [[BROADCAST_SPLAT]], i64 [[LAST_ACTIVE_LANE]]
; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]]
; TF-SCALABLE: [[FOR_END]]:
>From 70395cfcfb938796303805033c4729d1fdf5ad20 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Mon, 22 Dec 2025 22:40:02 -0800
Subject: [PATCH 2/2] Address comments.
---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 26 +++++++++++--------
1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 5127707abd3ac..7e64c6a781950 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -843,27 +843,31 @@ Value *VPInstruction::generate(VPTransformState &State) {
case VPInstruction::ExtractLane: {
Value *LaneToExtract = State.get(getOperand(0), true);
Type *IdxTy = State.TypeAnalysis.inferScalarType(getOperand(0));
- Value *Ext = State.VF.isScalar()
- ? State.get(getOperand(1))
- : Builder.CreateExtractElement(State.get(getOperand(1)),
- LaneToExtract);
+ Value *Res = nullptr;
+ // Just create an extractelement when extracting from a single vector.
if (getNumOperands() == 2)
- return Ext;
+ return State.VF.isScalar() ? State.get(getOperand(1))
+ : Builder.CreateExtractElement(
+ State.get(getOperand(1)), LaneToExtract);
- Value *Res = Ext;
Value *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
-
- for (unsigned Idx = 2; Idx != getNumOperands(); ++Idx) {
+ for (unsigned Idx = 1; Idx != getNumOperands(); ++Idx) {
Value *VectorStart =
Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1));
- Value *VectorIdx = Builder.CreateSub(LaneToExtract, VectorStart);
+ Value *VectorIdx = Idx == 1
+ ? LaneToExtract
+ : Builder.CreateSub(LaneToExtract, VectorStart);
Value *Ext = State.VF.isScalar()
? State.get(getOperand(Idx))
: Builder.CreateExtractElement(
State.get(getOperand(Idx)), VectorIdx);
- Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart);
- Res = Builder.CreateSelect(Cmp, Ext, Res);
+ if (Res) {
+ Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart);
+ Res = Builder.CreateSelect(Cmp, Ext, Res);
+ } else {
+ Res = Ext;
+ }
}
return Res;
}
More information about the llvm-commits
mailing list