[llvm] [InstCombine] Allow folding cross-lane operations into PHIs/selects (PR #164388)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 21 03:34:25 PDT 2025
https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/164388
Previously, cross-lane operations were disallowed here, but they are only problematic if the `select` condition is a vector, as the input of the operation is not simply one of the arms of the phi/select.
>From 5046a8e46d83834627f90f3f515e7dd9e241fa45 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 20 Oct 2025 16:05:42 +0000
Subject: [PATCH] [InstCombine] Allow folding cross-lane operations into
PHIs/selects
Previously, cross-lane operations were disallowed here, but they are
only problematic if the `select` condition is a vector, as the input of
the operation is not simply one of the arms of the phi/select.
---
.../InstCombine/InstCombineCalls.cpp | 22 +++++++++++-----
.../InstCombine/InstCombineInternal.h | 3 ++-
.../InstCombine/InstructionCombining.cpp | 6 ++++-
.../InstCombine/intrinsic-select.ll | 3 +--
llvm/test/Transforms/InstCombine/phi.ll | 25 +++++++++++++++++++
.../Transforms/InstCombine/select_frexp.ll | 8 +++---
.../always-inline-alloca-promotion.ll | 5 ++--
7 files changed, 55 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index dab200d86d4a0..4a9128a3384f0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -4003,18 +4003,28 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// Try to fold intrinsic into select/phi operands. This is legal if:
// * The intrinsic is speculatable.
- // * The select condition is not a vector, or the intrinsic does not
- // perform cross-lane operations.
- if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI) &&
- isNotCrossLaneOperation(II))
+ // * The operand is one of the following:
+ // - a phi.
+ // - a select with a scalar condition.
+ // - a select with a vector condition and II is not a cross lane operation.
+ if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI)) {
for (Value *Op : II->args()) {
- if (auto *Sel = dyn_cast<SelectInst>(Op))
- if (Instruction *R = FoldOpIntoSelect(*II, Sel))
+ if (auto *Sel = dyn_cast<SelectInst>(Op)) {
+ bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
+ if (IsVectorCond && !isNotCrossLaneOperation(II))
+ continue;
+ // Don't replace a scalar select with a more expensive vector select if
+ // we can't simplify both arms of the select.
+ bool SimplifyBothArms = !Op->getType()->isVectorTy() && II->getType()->isVectorTy();
+ if (Instruction *R = FoldOpIntoSelect(
+ *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
return R;
+ }
if (auto *Phi = dyn_cast<PHINode>(Op))
if (Instruction *R = foldOpIntoPhi(*II, Phi))
return R;
}
+ }
if (Instruction *Shuf = foldShuffledIntrinsicOperands(II))
return Shuf;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 943c223e516fb..ede73f86f1944 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -664,7 +664,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
/// This also works for Cast instructions, which obviously do not have a
/// second operand.
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
- bool FoldWithMultiUse = false);
+ bool FoldWithMultiUse = false,
+ bool SimplifyBothArms = false);
/// This is a convenience wrapper function for the above two functions.
Instruction *foldBinOpIntoSelectOrPhi(BinaryOperator &I);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 3f11cae143b81..c52d030efd229 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1777,7 +1777,8 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
}
Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
- bool FoldWithMultiUse) {
+ bool FoldWithMultiUse,
+ bool SimplifyBothArms) {
// Don't modify shared select instructions unless set FoldWithMultiUse
if (!SI->hasOneUse() && !FoldWithMultiUse)
return nullptr;
@@ -1821,6 +1822,9 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
if (!NewTV && !NewFV)
return nullptr;
+ if (SimplifyBothArms)
+ return nullptr;
+
// Create an instruction for the arm that did not fold.
if (!NewTV)
NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
diff --git a/llvm/test/Transforms/InstCombine/intrinsic-select.ll b/llvm/test/Transforms/InstCombine/intrinsic-select.ll
index 2f1f9fc2b6f9e..18f068856951e 100644
--- a/llvm/test/Transforms/InstCombine/intrinsic-select.ll
+++ b/llvm/test/Transforms/InstCombine/intrinsic-select.ll
@@ -222,8 +222,7 @@ declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
define i32 @vec_to_scalar_select_scalar(i1 %b) {
; CHECK-LABEL: @vec_to_scalar_select_scalar(
-; CHECK-NEXT: [[S:%.*]] = select i1 [[B:%.*]], <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 3, i32 4>
-; CHECK-NEXT: [[C:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[S]])
+; CHECK-NEXT: [[C:%.*]] = select i1 [[B:%.*]], i32 3, i32 7
; CHECK-NEXT: ret i32 [[C]]
;
%s = select i1 %b, <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 3, i32 4>
diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll
index 3454835d3ad65..c8a1cf6efb742 100644
--- a/llvm/test/Transforms/InstCombine/phi.ll
+++ b/llvm/test/Transforms/InstCombine/phi.ll
@@ -3026,6 +3026,31 @@ join:
ret i32 %umax
}
+define i32 @cross_lane_intrinsic_over_phi(i1 %c, i1 %c2, <4 x i32> %a) {
+; CHECK-LABEL: @cross_lane_intrinsic_over_phi(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]]
+; CHECK: if:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[A:%.*]])
+; CHECK-NEXT: br label [[JOIN]]
+; CHECK: join:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: call void @may_exit()
+; CHECK-NEXT: ret i32 [[PHI]]
+;
+entry:
+ br i1 %c, label %if, label %join
+
+if:
+ br label %join
+
+join:
+ %phi = phi <4 x i32> [ %a, %if ], [ zeroinitializer, %entry ]
+ call void @may_exit()
+ %sum = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %phi)
+ ret i32 %sum
+}
+
define i32 @multiple_intrinsics_with_multiple_phi_uses(i1 %c, i32 %arg) {
; CHECK-LABEL: @multiple_intrinsics_with_multiple_phi_uses(
; CHECK-NEXT: entry:
diff --git a/llvm/test/Transforms/InstCombine/select_frexp.ll b/llvm/test/Transforms/InstCombine/select_frexp.ll
index d025aedda7170..ccfcca1fe7cb5 100644
--- a/llvm/test/Transforms/InstCombine/select_frexp.ll
+++ b/llvm/test/Transforms/InstCombine/select_frexp.ll
@@ -115,10 +115,10 @@ define float @test_select_frexp_no_const(float %x, float %y, i1 %cond) {
define i32 @test_select_frexp_extract_exp(float %x, i1 %cond) {
; CHECK-LABEL: define i32 @test_select_frexp_extract_exp(
; CHECK-SAME: float [[X:%.*]], i1 [[COND:%.*]]) {
-; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], float 1.000000e+00, float [[X]]
-; CHECK-NEXT: [[FREXP:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SEL]])
+; CHECK-NEXT: [[FREXP:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
; CHECK-NEXT: [[FREXP_1:%.*]] = extractvalue { float, i32 } [[FREXP]], 1
-; CHECK-NEXT: ret i32 [[FREXP_1]]
+; CHECK-NEXT: [[FREXP_2:%.*]] = select i1 [[COND]], i32 1, i32 [[FREXP_1]]
+; CHECK-NEXT: ret i32 [[FREXP_2]]
;
%sel = select i1 %cond, float 1.000000e+00, float %x
%frexp = call { float, i32 } @llvm.frexp.f32.i32(float %sel)
@@ -132,7 +132,7 @@ define float @test_select_frexp_fast_math_select(float %x, i1 %cond) {
; CHECK-SAME: float [[X:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: [[FREXP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
; CHECK-NEXT: [[MANTISSA:%.*]] = extractvalue { float, i32 } [[FREXP1]], 0
-; CHECK-NEXT: [[SELECT_FREXP:%.*]] = select nnan ninf nsz i1 [[COND]], float 5.000000e-01, float [[MANTISSA]]
+; CHECK-NEXT: [[SELECT_FREXP:%.*]] = select i1 [[COND]], float 5.000000e-01, float [[MANTISSA]]
; CHECK-NEXT: ret float [[SELECT_FREXP]]
;
%sel = select nnan ninf nsz i1 %cond, float 1.000000e+00, float %x
diff --git a/llvm/test/Transforms/PhaseOrdering/always-inline-alloca-promotion.ll b/llvm/test/Transforms/PhaseOrdering/always-inline-alloca-promotion.ll
index 63279b04bda28..92ea2c608fe53 100644
--- a/llvm/test/Transforms/PhaseOrdering/always-inline-alloca-promotion.ll
+++ b/llvm/test/Transforms/PhaseOrdering/always-inline-alloca-promotion.ll
@@ -35,9 +35,8 @@ define void @ham() #1 {
; CHECK-NEXT: [[SNORK_EXIT:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr inttoptr (i64 48 to ptr), align 16
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[TMP0]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i64 0)
-; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP1]], <vscale x 16 x float> [[TMP2]], <vscale x 16 x float> undef
-; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[SPEC_SELECT]], i64 0)
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> undef, i64 0)
+; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], <vscale x 4 x float> zeroinitializer, <vscale x 4 x float> [[TMP2]]
; CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv4f32(i32 0, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> zeroinitializer, <vscale x 4 x float> [[TMP3]])
; CHECK-NEXT: ret void
;
More information about the llvm-commits
mailing list