[llvm] [InstCombine] Pull vector reverse through intrinsics (PR #146384)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 1 03:28:39 PDT 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/146384
>From 1ae4ddc06ca571c0630016e117eae4e00ae88e6f Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 30 Jun 2025 14:40:23 +0100
Subject: [PATCH 1/4] Precommit tests
---
.../Transforms/InstCombine/vector-reverse.ll | 145 ++++++++++++++++++
1 file changed, 145 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/vector-reverse.ll b/llvm/test/Transforms/InstCombine/vector-reverse.ll
index c9c68d2241b34..d1d8c259e7c8e 100644
--- a/llvm/test/Transforms/InstCombine/vector-reverse.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reverse.ll
@@ -17,6 +17,19 @@ define <vscale x 4 x i32> @binop_reverse(<vscale x 4 x i32> %a, <vscale x 4 x i3
ret <vscale x 4 x i32> %add
}
+define <vscale x 4 x i32> @binop_intrinsic_reverse(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: @binop_intrinsic_reverse(
+; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
+; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]])
+; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> [[A_REV]], <vscale x 4 x i32> [[B_REV]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
+;
+ %a.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
+ %b.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %b)
+ %smax = call <vscale x 4 x i32> @llvm.smax(<vscale x 4 x i32> %a.rev, <vscale x 4 x i32> %b.rev)
+ ret <vscale x 4 x i32> %smax
+}
+
; %a.rev has multiple uses
define <vscale x 4 x i32> @binop_reverse_1(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: @binop_reverse_1(
@@ -33,6 +46,22 @@ define <vscale x 4 x i32> @binop_reverse_1(<vscale x 4 x i32> %a, <vscale x 4 x
ret <vscale x 4 x i32> %add
}
+; %a.rev has multiple uses
+define <vscale x 4 x i32> @binop_intrinsic_reverse_1(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: @binop_intrinsic_reverse_1(
+; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
+; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]])
+; CHECK-NEXT: call void @use_nxv4i32(<vscale x 4 x i32> [[A_REV]])
+; CHECK-NEXT: [[SMAX:%.*]] = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> [[A_REV]], <vscale x 4 x i32> [[B_REV]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[SMAX]]
+;
+ %a.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
+ %b.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %b)
+ call void @use_nxv4i32(<vscale x 4 x i32> %a.rev)
+ %smax = call <vscale x 4 x i32> @llvm.smax(<vscale x 4 x i32> %a.rev, <vscale x 4 x i32> %b.rev)
+ ret <vscale x 4 x i32> %smax
+}
+
; %b.rev has multiple uses
define <vscale x 4 x i32> @binop_reverse_2(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: @binop_reverse_2(
@@ -67,6 +96,24 @@ define <vscale x 4 x i32> @binop_reverse_3(<vscale x 4 x i32> %a, <vscale x 4 x
ret <vscale x 4 x i32> %add
}
+; %a.rev and %b.rev have multiple uses
+define <vscale x 4 x i32> @binop_intrinsic_reverse_3(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: @binop_intrinsic_reverse_3(
+; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
+; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]])
+; CHECK-NEXT: call void @use_nxv4i32(<vscale x 4 x i32> [[A_REV]])
+; CHECK-NEXT: call void @use_nxv4i32(<vscale x 4 x i32> [[B_REV]])
+; CHECK-NEXT: [[SMAX:%.*]] = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> [[A_REV]], <vscale x 4 x i32> [[B_REV]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[SMAX]]
+;
+ %a.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
+ %b.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %b)
+ call void @use_nxv4i32(<vscale x 4 x i32> %a.rev)
+ call void @use_nxv4i32(<vscale x 4 x i32> %b.rev)
+ %smax = call <vscale x 4 x i32> @llvm.smax(<vscale x 4 x i32> %a.rev, <vscale x 4 x i32> %b.rev)
+ ret <vscale x 4 x i32> %smax
+}
+
; %a.rev used as both operands
define <vscale x 4 x i32> @binop_reverse_4(<vscale x 4 x i32> %a) {
; CHECK-LABEL: @binop_reverse_4(
@@ -184,6 +231,17 @@ define <vscale x 4 x float> @unop_reverse_1(<vscale x 4 x float> %a) {
ret <vscale x 4 x float> %neg
}
+define <vscale x 4 x float> @unop_intrinsic_reverse(<vscale x 4 x float> %a) {
+; CHECK-LABEL: @unop_intrinsic_reverse(
+; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]])
+; CHECK-NEXT: [[NEG:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[A_REV]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[NEG]]
+;
+ %a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
+ %abs = call <vscale x 4 x float> @llvm.fabs(<vscale x 4 x float> %a.rev)
+ ret <vscale x 4 x float> %abs
+}
+
define <vscale x 4 x i1> @icmp_reverse(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: @icmp_reverse(
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
@@ -629,6 +687,21 @@ define <vscale x 4 x float> @reverse_binop_reverse(<vscale x 4 x float> %a, <vsc
ret <vscale x 4 x float> %add.rev
}
+define <vscale x 4 x float> @reverse_binop_intrinsic_reverse(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: @reverse_binop_intrinsic_reverse(
+; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]])
+; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[B:%.*]])
+; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[A_REV]], <vscale x 4 x float> [[B_REV]])
+; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[ADD]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[MAXNUM_REV]]
+;
+ %a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
+ %b.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %b)
+ %maxnum = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> %a.rev, <vscale x 4 x float> %b.rev)
+ %maxnum.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %maxnum)
+ ret <vscale x 4 x float> %maxnum.rev
+}
+
define <vscale x 4 x float> @reverse_binop_reverse_splat_RHS(<vscale x 4 x float> %a, float %b) {
; CHECK-LABEL: @reverse_binop_reverse_splat_RHS(
; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i64 0
@@ -659,6 +732,53 @@ define <vscale x 4 x float> @reverse_binop_reverse_splat_LHS(<vscale x 4 x float
ret <vscale x 4 x float> %div.rev
}
+define <vscale x 4 x float> @reverse_binop_reverse_intrinsic_splat_RHS(<vscale x 4 x float> %a, float %b) {
+; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_RHS(
+; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]])
+; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i64 0
+; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[B_INSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[MAXNUM:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[A_REV]], <vscale x 4 x float> [[B_SPLAT]])
+; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[MAXNUM]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[MAXNUM_REV]]
+;
+ %a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
+ %b.insert = insertelement <vscale x 4 x float> poison, float %b, i32 0
+ %b.splat = shufflevector <vscale x 4 x float> %b.insert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+ %maxnum = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> %a.rev, <vscale x 4 x float> %b.splat)
+ %maxnum.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %maxnum)
+ ret <vscale x 4 x float> %maxnum.rev
+}
+
+define <vscale x 4 x float> @reverse_binop_reverse_intrinsic_splat_LHS(<vscale x 4 x float> %a, float %b) {
+; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_LHS(
+; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]])
+; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i64 0
+; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[B_INSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[MAXNUM:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[B_SPLAT]], <vscale x 4 x float> [[A_REV]])
+; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[MAXNUM]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[MAXNUM_REV]]
+;
+ %a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
+ %b.insert = insertelement <vscale x 4 x float> poison, float %b, i32 0
+ %b.splat = shufflevector <vscale x 4 x float> %b.insert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+ %maxnum = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> %b.splat, <vscale x 4 x float> %a.rev)
+ %maxnum.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %maxnum)
+ ret <vscale x 4 x float> %maxnum.rev
+}
+
+define <4 x float> @reverse_binop_reverse_intrinsic_constant_RHS(<4 x float> %a) {
+; CHECK-LABEL: @reverse_binop_reverse_intrinsic_constant_RHS(
+; CHECK-NEXT: [[MAXNUM_REV1:%.*]] = tail call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[MAXNUM1:%.*]])
+; CHECK-NEXT: [[MAXNUM:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[MAXNUM_REV1]], <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>)
+; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[MAXNUM]])
+; CHECK-NEXT: ret <4 x float> [[MAXNUM_REV]]
+;
+ %a.rev = tail call <4 x float> @llvm.vector.reverse(<4 x float> %a)
+ %maxnum = call <4 x float> @llvm.maxnum.v4f32(<4 x float> <float 0.0, float 1.0, float 2.0, float 3.0>, <4 x float> %a.rev)
+ %maxnum.rev = tail call <4 x float> @llvm.vector.reverse(<4 x float> %maxnum)
+ ret <4 x float> %maxnum.rev
+}
+
define <vscale x 4 x i1> @reverse_fcmp_reverse(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: @reverse_fcmp_reverse(
; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast olt <vscale x 4 x float> [[A:%.*]], [[B:%.*]]
@@ -695,6 +815,31 @@ define <vscale x 4 x float> @reverse_unop_reverse(<vscale x 4 x float> %a) {
ret <vscale x 4 x float> %neg.rev
}
+define <vscale x 4 x float> @reverse_unop_intrinsic_reverse(<vscale x 4 x float> %a) {
+; CHECK-LABEL: @reverse_unop_intrinsic_reverse(
+; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]])
+; CHECK-NEXT: [[ABS:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[A_REV]])
+; CHECK-NEXT: [[ABS_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[ABS]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[ABS_REV]]
+;
+ %a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
+ %abs = call <vscale x 4 x float> @llvm.fabs(<vscale x 4 x float> %a.rev)
+ %abs.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %abs)
+ ret <vscale x 4 x float> %abs.rev
+}
+
+define <vscale x 4 x float> @reverse_unop_intrinsic_reverse_scalar_arg(<vscale x 4 x float> %a, i32 %power) {
+; CHECK-LABEL: @reverse_unop_intrinsic_reverse_scalar_arg(
+; CHECK-NEXT: [[A:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A1:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> [[A]], i32 [[POWER:%.*]])
+; CHECK-NEXT: [[POWI_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[POWI_REV]]
+;
+ %a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
+ %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32(<vscale x 4 x float> %a.rev, i32 %power)
+ %powi.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %powi)
+ ret <vscale x 4 x float> %powi.rev
+}
declare void @use_nxv4i1(<vscale x 4 x i1>)
declare void @use_nxv4i32(<vscale x 4 x i32>)
>From e9ab4347982f813989237353d3c0b4bcf28cb455 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 30 Jun 2025 17:27:41 +0100
Subject: [PATCH 2/4] [InstCombine] Pull vector reverse through intrinsics
This is the intrinsic version of #146349, and handles fabs as well as other intrinsics.
It's largely a copy of InstCombinerImpl::foldShuffledIntrinsicOperands but a bit simpler since we don't need to find a common mask.
Creating a separate function seems to be cleaner than trying to shoehorn it into the existing one.
---
.../InstCombine/InstCombineCalls.cpp | 42 +++++++++++++++
.../InstCombine/InstCombineInternal.h | 1 +
.../Transforms/InstCombine/vector-reverse.ll | 54 ++++++++-----------
3 files changed, 64 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e33d111167c04..9d07a4531d0a3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1456,6 +1456,45 @@ InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
return new ShuffleVectorInst(NewIntrinsic, Mask);
}
+/// If all arguments of the intrinsic are reverses, try to pull the reverse
+/// after the intrinsic.
+Value *InstCombinerImpl::foldReversedIntrinsicOperands(IntrinsicInst *II) {
+ if (!isTriviallyVectorizable(II->getIntrinsicID()) ||
+ !II->getCalledFunction()->isSpeculatable())
+ return nullptr;
+
+ // At least 1 operand must be a reverse with 1 use because we are creating 2
+ // instructions.
+ if (none_of(II->args(), [](Value *V) {
+ return match(V, m_OneUse(m_VecReverse(m_Value())));
+ }))
+ return nullptr;
+
+ Value *X;
+ Constant *C;
+ SmallVector<Value *> NewArgs;
+ for (Use &Arg : II->args()) {
+ if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
+ Arg.getOperandNo(), nullptr))
+ NewArgs.push_back(Arg);
+ else if (match(&Arg, m_VecReverse(m_Value(X))))
+ NewArgs.push_back(X);
+ else if (Value *Splat = getSplatValue(Arg))
+ NewArgs.push_back(Builder.CreateVectorSplat(
+ cast<VectorType>(Arg->getType())->getElementCount(), Splat));
+ else if (match(&Arg, m_ImmConstant(C)))
+ NewArgs.push_back(Builder.CreateVectorReverse(C));
+ else
+ return nullptr;
+ }
+
+ // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
+ Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
+ Instruction *NewIntrinsic = Builder.CreateIntrinsic(
+ II->getType(), II->getIntrinsicID(), NewArgs, FPI);
+ return Builder.CreateVectorReverse(NewIntrinsic);
+}
+
/// Fold the following cases and accepts bswap and bitreverse intrinsics:
/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
@@ -3867,6 +3906,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Instruction *Shuf = foldShuffledIntrinsicOperands(II))
return Shuf;
+ if (Value *Reverse = foldReversedIntrinsicOperands(II))
+ return replaceInstUsesWith(*II, Reverse);
+
// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
// context, so it is handled in visitCallBase and we should trigger it.
return visitCallBase(*II);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 8c9de862fe8f2..a8645521fe053 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -148,6 +148,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
Instruction *foldItoFPtoI(CastInst &FI);
Instruction *visitSelectInst(SelectInst &SI);
Instruction *foldShuffledIntrinsicOperands(IntrinsicInst *II);
+ Value *foldReversedIntrinsicOperands(IntrinsicInst *II);
Instruction *visitCallInst(CallInst &CI);
Instruction *visitInvokeInst(InvokeInst &II);
Instruction *visitCallBrInst(CallBrInst &CBI);
diff --git a/llvm/test/Transforms/InstCombine/vector-reverse.ll b/llvm/test/Transforms/InstCombine/vector-reverse.ll
index d1d8c259e7c8e..cbf6b37692cac 100644
--- a/llvm/test/Transforms/InstCombine/vector-reverse.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reverse.ll
@@ -19,10 +19,9 @@ define <vscale x 4 x i32> @binop_reverse(<vscale x 4 x i32> %a, <vscale x 4 x i3
define <vscale x 4 x i32> @binop_intrinsic_reverse(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: @binop_intrinsic_reverse(
-; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
-; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]])
-; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> [[A_REV]], <vscale x 4 x i32> [[B_REV]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
+; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> [[A_REV:%.*]], <vscale x 4 x i32> [[B_REV:%.*]])
+; CHECK-NEXT: [[SMAX:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[ADD]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[SMAX]]
;
%a.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
%b.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %b)
@@ -49,10 +48,10 @@ define <vscale x 4 x i32> @binop_reverse_1(<vscale x 4 x i32> %a, <vscale x 4 x
; %a.rev has multiple uses
define <vscale x 4 x i32> @binop_intrinsic_reverse_1(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: @binop_intrinsic_reverse_1(
-; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]])
-; CHECK-NEXT: call void @use_nxv4i32(<vscale x 4 x i32> [[A_REV]])
-; CHECK-NEXT: [[SMAX:%.*]] = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> [[A_REV]], <vscale x 4 x i32> [[B_REV]])
+; CHECK-NEXT: call void @use_nxv4i32(<vscale x 4 x i32> [[B_REV]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[B1:%.*]])
+; CHECK-NEXT: [[SMAX:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP1]])
; CHECK-NEXT: ret <vscale x 4 x i32> [[SMAX]]
;
%a.rev = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
@@ -233,9 +232,9 @@ define <vscale x 4 x float> @unop_reverse_1(<vscale x 4 x float> %a) {
define <vscale x 4 x float> @unop_intrinsic_reverse(<vscale x 4 x float> %a) {
; CHECK-LABEL: @unop_intrinsic_reverse(
-; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]])
-; CHECK-NEXT: [[NEG:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[A_REV]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[NEG]]
+; CHECK-NEXT: [[NEG:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[A_REV:%.*]])
+; CHECK-NEXT: [[ABS:%.*]] = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[NEG]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[ABS]]
;
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
%abs = call <vscale x 4 x float> @llvm.fabs(<vscale x 4 x float> %a.rev)
@@ -689,11 +688,8 @@ define <vscale x 4 x float> @reverse_binop_reverse(<vscale x 4 x float> %a, <vsc
define <vscale x 4 x float> @reverse_binop_intrinsic_reverse(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: @reverse_binop_intrinsic_reverse(
-; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]])
-; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[B:%.*]])
-; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[A_REV]], <vscale x 4 x float> [[B_REV]])
-; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[ADD]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[MAXNUM_REV]]
+; CHECK-NEXT: [[ADD:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[A_REV:%.*]], <vscale x 4 x float> [[B_REV:%.*]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[ADD]]
;
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
%b.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %b)
@@ -734,12 +730,10 @@ define <vscale x 4 x float> @reverse_binop_reverse_splat_LHS(<vscale x 4 x float
define <vscale x 4 x float> @reverse_binop_reverse_intrinsic_splat_RHS(<vscale x 4 x float> %a, float %b) {
; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_RHS(
-; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]])
; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i64 0
; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[B_INSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[MAXNUM:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[A_REV]], <vscale x 4 x float> [[B_SPLAT]])
-; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[MAXNUM]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[MAXNUM_REV]]
+; CHECK-NEXT: [[MAXNUM:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[A_REV:%.*]], <vscale x 4 x float> [[B_SPLAT]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[MAXNUM]]
;
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
%b.insert = insertelement <vscale x 4 x float> poison, float %b, i32 0
@@ -751,12 +745,10 @@ define <vscale x 4 x float> @reverse_binop_reverse_intrinsic_splat_RHS(<vscale x
define <vscale x 4 x float> @reverse_binop_reverse_intrinsic_splat_LHS(<vscale x 4 x float> %a, float %b) {
; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_LHS(
-; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]])
; CHECK-NEXT: [[B_INSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i64 0
; CHECK-NEXT: [[B_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[B_INSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[MAXNUM:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[B_SPLAT]], <vscale x 4 x float> [[A_REV]])
-; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[MAXNUM]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[MAXNUM_REV]]
+; CHECK-NEXT: [[MAXNUM:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[B_SPLAT]], <vscale x 4 x float> [[A_REV:%.*]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[MAXNUM]]
;
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
%b.insert = insertelement <vscale x 4 x float> poison, float %b, i32 0
@@ -768,8 +760,8 @@ define <vscale x 4 x float> @reverse_binop_reverse_intrinsic_splat_LHS(<vscale x
define <4 x float> @reverse_binop_reverse_intrinsic_constant_RHS(<4 x float> %a) {
; CHECK-LABEL: @reverse_binop_reverse_intrinsic_constant_RHS(
-; CHECK-NEXT: [[MAXNUM_REV1:%.*]] = tail call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[MAXNUM1:%.*]])
-; CHECK-NEXT: [[MAXNUM:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[MAXNUM_REV1]], <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>)
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 0.000000e+00>)
+; CHECK-NEXT: [[MAXNUM:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[MAXNUM]])
; CHECK-NEXT: ret <4 x float> [[MAXNUM_REV]]
;
@@ -817,10 +809,8 @@ define <vscale x 4 x float> @reverse_unop_reverse(<vscale x 4 x float> %a) {
define <vscale x 4 x float> @reverse_unop_intrinsic_reverse(<vscale x 4 x float> %a) {
; CHECK-LABEL: @reverse_unop_intrinsic_reverse(
-; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]])
-; CHECK-NEXT: [[ABS:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[A_REV]])
-; CHECK-NEXT: [[ABS_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[ABS]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[ABS_REV]]
+; CHECK-NEXT: [[ABS:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[A_REV:%.*]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[ABS]]
;
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
%abs = call <vscale x 4 x float> @llvm.fabs(<vscale x 4 x float> %a.rev)
@@ -830,10 +820,8 @@ define <vscale x 4 x float> @reverse_unop_intrinsic_reverse(<vscale x 4 x float>
define <vscale x 4 x float> @reverse_unop_intrinsic_reverse_scalar_arg(<vscale x 4 x float> %a, i32 %power) {
; CHECK-LABEL: @reverse_unop_intrinsic_reverse_scalar_arg(
-; CHECK-NEXT: [[A:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[A1:%.*]])
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> [[A]], i32 [[POWER:%.*]])
-; CHECK-NEXT: [[POWI_REV:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[TMP1]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[POWI_REV]]
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> [[A:%.*]], i32 [[POWER:%.*]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%a.rev = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
%powi = call <vscale x 4 x float> @llvm.powi.nxv4f32(<vscale x 4 x float> %a.rev, i32 %power)
>From 0b093262272dc809fee821116241b07265e47320 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 1 Jul 2025 10:59:58 +0100
Subject: [PATCH 3/4] Remove isSpeculatable check
---
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 9d07a4531d0a3..7dc7244c3e8aa 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1459,8 +1459,7 @@ InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
/// If all arguments of the intrinsic are reverses, try to pull the reverse
/// after the intrinsic.
Value *InstCombinerImpl::foldReversedIntrinsicOperands(IntrinsicInst *II) {
- if (!isTriviallyVectorizable(II->getIntrinsicID()) ||
- !II->getCalledFunction()->isSpeculatable())
+ if (!isTriviallyVectorizable(II->getIntrinsicID()))
return nullptr;
// At least 1 operand must be a reverse with 1 use because we are creating 2
>From e0acdf38c5c3dcc021fe1a6ffe34115fb02bc321 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 1 Jul 2025 11:28:15 +0100
Subject: [PATCH 4/4] Don't create new splats, use isSplatValue
---
.../Transforms/InstCombine/InstCombineCalls.cpp | 5 ++---
llvm/test/Transforms/InstCombine/vector-reverse.ll | 14 ++++++++++++++
2 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 7dc7244c3e8aa..fa2a6758e98df 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1478,9 +1478,8 @@ Value *InstCombinerImpl::foldReversedIntrinsicOperands(IntrinsicInst *II) {
NewArgs.push_back(Arg);
else if (match(&Arg, m_VecReverse(m_Value(X))))
NewArgs.push_back(X);
- else if (Value *Splat = getSplatValue(Arg))
- NewArgs.push_back(Builder.CreateVectorSplat(
- cast<VectorType>(Arg->getType())->getElementCount(), Splat));
+ else if (isSplatValue(Arg))
+ NewArgs.push_back(Arg);
else if (match(&Arg, m_ImmConstant(C)))
NewArgs.push_back(Builder.CreateVectorReverse(C));
else
diff --git a/llvm/test/Transforms/InstCombine/vector-reverse.ll b/llvm/test/Transforms/InstCombine/vector-reverse.ll
index cbf6b37692cac..9eb941d7b1c84 100644
--- a/llvm/test/Transforms/InstCombine/vector-reverse.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reverse.ll
@@ -758,6 +758,20 @@ define <vscale x 4 x float> @reverse_binop_reverse_intrinsic_splat_LHS(<vscale x
ret <vscale x 4 x float> %maxnum.rev
}
+; Negative test: Make sure that splats with poison aren't considered splats
+define <4 x float> @reverse_binop_reverse_intrinsic_splat_with_poison(<4 x float> %a) {
+; CHECK-LABEL: @reverse_binop_reverse_intrinsic_splat_with_poison(
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> <float 1.000000e+00, float poison, float 1.000000e+00, float 1.000000e+00>)
+; CHECK-NEXT: [[MAXNUM:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[MAXNUM_REV:%.*]] = tail call <4 x float> @llvm.vector.reverse.v4f32(<4 x float> [[MAXNUM]])
+; CHECK-NEXT: ret <4 x float> [[MAXNUM_REV]]
+;
+ %a.rev = tail call <4 x float> @llvm.vector.reverse(<4 x float> %a)
+ %maxnum = call <4 x float> @llvm.maxnum.v4f32(<4 x float> <float 1.0, float 1.0, float poison, float 1.0>, <4 x float> %a.rev)
+ %maxnum.rev = tail call <4 x float> @llvm.vector.reverse(<4 x float> %maxnum)
+ ret <4 x float> %maxnum.rev
+}
+
define <4 x float> @reverse_binop_reverse_intrinsic_constant_RHS(<4 x float> %a) {
; CHECK-LABEL: @reverse_binop_reverse_intrinsic_constant_RHS(
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 0.000000e+00>)
More information about the llvm-commits
mailing list