[llvm] [VectorCombine] Scalarize binop-like intrinsics (PR #138095)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Sat May 3 08:05:28 PDT 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/138095
>From 1d031ea0234499e5cd42df4933508379a2e286fd Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 1 May 2025 15:32:07 +0800
Subject: [PATCH 1/9] Precommit tests
---
.../RISCV/intrinsic-scalarize.ll | 93 +++++++++++++++++++
1 file changed, 93 insertions(+)
create mode 100644 llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
diff --git a/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll b/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
new file mode 100644
index 0000000000000..55b78c4716bc0
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -p vector-combine | FileCheck %s
+
+define <4 x i32> @umax_fixed(i32 %x, i32 %y) {
+; CHECK-LABEL: define <4 x i32> @umax_fixed(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
+; CHECK-NEXT: [[Y_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
+; CHECK-NEXT: [[V:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[X_INSERT]], <4 x i32> [[Y_INSERT]])
+; CHECK-NEXT: ret <4 x i32> [[V]]
+;
+ %x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
+ %y.insert = insertelement <4 x i32> poison, i32 %y, i32 0
+ %v = call <4 x i32> @llvm.umax(<4 x i32> %x.insert, <4 x i32> %y.insert)
+ ret <4 x i32> %v
+}
+
+define <vscale x 4 x i32> @umax_scalable(i32 %x, i32 %y) {
+; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i32 0
+; CHECK-NEXT: [[Y_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y]], i32 0
+; CHECK-NEXT: [[V:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> [[X_INSERT]], <vscale x 4 x i32> [[Y_INSERT]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[V]]
+;
+ %x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
+ %y.insert = insertelement <vscale x 4 x i32> poison, i32 %y, i32 0
+ %v = call <vscale x 4 x i32> @llvm.umax(<vscale x 4 x i32> %x.insert, <vscale x 4 x i32> %y.insert)
+ ret <vscale x 4 x i32> %v
+}
+
+define <4 x i32> @umax_fixed_lhs_const(i32 %x) {
+; CHECK-LABEL: define <4 x i32> @umax_fixed_lhs_const(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
+; CHECK-NEXT: [[V:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> [[X_INSERT]])
+; CHECK-NEXT: ret <4 x i32> [[V]]
+;
+ %x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
+ %v = call <4 x i32> @llvm.umax(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> %x.insert)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @umax_fixed_rhs_const(i32 %x) {
+; CHECK-LABEL: define <4 x i32> @umax_fixed_rhs_const(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
+; CHECK-NEXT: [[V:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[X_INSERT]], <4 x i32> <i32 1, i32 2, i32 3, i32 4>)
+; CHECK-NEXT: ret <4 x i32> [[V]]
+;
+ %x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
+ %v = call <4 x i32> @llvm.umax(<4 x i32> %x.insert, <4 x i32> <i32 1, i32 2, i32 3, i32 4>)
+ ret <4 x i32> %v
+}
+
+define <vscale x 4 x i32> @umax_scalable_lhs_const(i32 %x) {
+; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable_lhs_const(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i32 0
+; CHECK-NEXT: [[V:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> splat (i32 42), <vscale x 4 x i32> [[X_INSERT]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[V]]
+;
+ %x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
+ %v = call <vscale x 4 x i32> @llvm.umax(<vscale x 4 x i32> splat (i32 42), <vscale x 4 x i32> %x.insert)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @umax_scalable_rhs_const(i32 %x) {
+; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable_rhs_const(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i32 0
+; CHECK-NEXT: [[V:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> [[X_INSERT]], <vscale x 4 x i32> splat (i32 42))
+; CHECK-NEXT: ret <vscale x 4 x i32> [[V]]
+;
+ %x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
+ %v = call <vscale x 4 x i32> @llvm.umax(<vscale x 4 x i32> %x.insert, <vscale x 4 x i32> splat (i32 42))
+ ret <vscale x 4 x i32> %v
+}
+
+; Shouldn't be scalarized, not a "trivially vectorizable" intrinsic.
+define <4 x i32> @non_trivially_vectorizable(i32 %x, i32 %y) {
+; CHECK-LABEL: define <4 x i32> @non_trivially_vectorizable(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
+; CHECK-NEXT: [[Y_INSERT:%.*]] = insertelement <8 x i32> poison, i32 [[Y]], i32 0
+; CHECK-NEXT: [[V:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v8i32(<4 x i32> [[X_INSERT]], <8 x i32> [[Y_INSERT]])
+; CHECK-NEXT: ret <4 x i32> [[V]]
+;
+ %x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
+ %y.insert = insertelement <8 x i32> poison, i32 %y, i32 0
+ %v = call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> %x.insert, <8 x i32> %y.insert)
+ ret <4 x i32> %v
+}
>From ebfcbe452b7657e54c4c4797b452136afb87a9b3 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 1 May 2025 15:32:38 +0800
Subject: [PATCH 2/9] [VectorCombine] Scalarize binop-like intrinsics
---
.../Transforms/Vectorize/VectorCombine.cpp | 64 ++++++++++++++-----
.../RISCV/intrinsic-scalarize.ll | 32 ++++++----
2 files changed, 66 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 04c084ffdda97..7a7c533267f6f 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -48,6 +48,7 @@ STATISTIC(NumVecCmpBO, "Number of vector compare + binop formed");
STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast");
STATISTIC(NumScalarBO, "Number of scalar binops formed");
STATISTIC(NumScalarCmp, "Number of scalar compares formed");
+STATISTIC(NumScalarIntrinsic, "Number of scalar intrinsic calls formed");
static cl::opt<bool> DisableVectorCombine(
"disable-vector-combine", cl::init(false), cl::Hidden,
@@ -1016,21 +1017,29 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {
return true;
}
-/// Match a vector binop or compare instruction with at least one inserted
-/// scalar operand and convert to scalar binop/cmp followed by insertelement.
+/// Match a vector binop, compare or binop-like intrinsic with at least one
+/// inserted scalar operand and convert to scalar binop/cmp/intrinsic followed
+/// by insertelement.
bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
CmpPredicate Pred = CmpInst::BAD_ICMP_PREDICATE;
Value *Ins0, *Ins1;
if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
- !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1))))
- return false;
+ !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1)))) {
+ if (auto *II = dyn_cast<IntrinsicInst>(&I);
+ II && II->arg_size() == 2 &&
+ isTriviallyVectorizable(II->getIntrinsicID())) {
+ Ins0 = II->getArgOperand(0);
+ Ins1 = II->getArgOperand(1);
+ } else {
+ return false;
+ }
+ }
// Do not convert the vector condition of a vector select into a scalar
// condition. That may cause problems for codegen because of differences in
// boolean formats and register-file transfers.
// TODO: Can we account for that in the cost model?
- bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
- if (IsCmp)
+ if (isa<CmpInst>(I))
for (User *U : I.users())
if (match(U, m_Select(m_Specific(&I), m_Value(), m_Value())))
return false;
@@ -1085,15 +1094,24 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
unsigned Opcode = I.getOpcode();
InstructionCost ScalarOpCost, VectorOpCost;
- if (IsCmp) {
+ if (isa<CmpInst>(I)) {
CmpInst::Predicate Pred = cast<CmpInst>(I).getPredicate();
ScalarOpCost = TTI.getCmpSelInstrCost(
Opcode, ScalarTy, CmpInst::makeCmpResultType(ScalarTy), Pred, CostKind);
VectorOpCost = TTI.getCmpSelInstrCost(
Opcode, VecTy, CmpInst::makeCmpResultType(VecTy), Pred, CostKind);
- } else {
+ } else if (isa<BinaryOperator>(I)) {
ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy, CostKind);
VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy, CostKind);
+ } else if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ IntrinsicCostAttributes ScalarICA(
+ II->getIntrinsicID(), ScalarTy,
+ SmallVector<Type *>(II->arg_size(), ScalarTy));
+ ScalarOpCost = TTI.getIntrinsicInstrCost(ScalarICA, CostKind);
+ IntrinsicCostAttributes VectorICA(
+ II->getIntrinsicID(), VecTy,
+ SmallVector<Type *>(II->arg_size(), VecTy));
+ VectorOpCost = TTI.getIntrinsicInstrCost(ScalarICA, CostKind);
}
// Get cost estimate for the insert element. This cost will factor into
@@ -1112,10 +1130,12 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
// vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
// inselt NewVecC, (scalar_op V0, V1), Index
- if (IsCmp)
+ if (isa<CmpInst>(I))
++NumScalarCmp;
- else
+ else if (isa<BinaryOperator>(I))
++NumScalarBO;
+ else if (isa<IntrinsicInst>(I))
+ ++NumScalarIntrinsic;
// For constant cases, extract the scalar element, this should constant fold.
if (IsConst0)
@@ -1123,9 +1143,15 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
if (IsConst1)
V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index));
- Value *Scalar =
- IsCmp ? Builder.CreateCmp(Pred, V0, V1)
- : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
+ Value *Scalar;
+ if (isa<CmpInst>(I))
+ Scalar = Builder.CreateCmp(Pred, V0, V1);
+ else if (isa<BinaryOperator>(I))
+ Scalar = Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
+ else if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ Scalar = Builder.CreateIntrinsic(ScalarTy, II->getIntrinsicID(), {V0, V1});
+ else
+ llvm_unreachable("Unexpected instruction type");
Scalar->setName(I.getName() + ".scalar");
@@ -1135,9 +1161,15 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
ScalarInst->copyIRFlags(&I);
// Fold the vector constants in the original vectors into a new base vector.
- Value *NewVecC =
- IsCmp ? Builder.CreateCmp(Pred, VecC0, VecC1)
- : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
+ Value *NewVecC;
+ if (isa<CmpInst>(I))
+ NewVecC = Builder.CreateCmp(Pred, VecC0, VecC1);
+ else if (isa<BinaryOperator>(I))
+ NewVecC = Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
+ else if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ NewVecC = Builder.CreateIntrinsic(VecTy, II->getIntrinsicID(), {VecC0, VecC1});
+ else
+ llvm_unreachable("Unexpected instruction type");
Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
replaceValue(I, *Insert);
return true;
diff --git a/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll b/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
index 55b78c4716bc0..5a25f5faf8911 100644
--- a/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
+++ b/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
@@ -4,9 +4,9 @@
define <4 x i32> @umax_fixed(i32 %x, i32 %y) {
; CHECK-LABEL: define <4 x i32> @umax_fixed(
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
-; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
-; CHECK-NEXT: [[Y_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
-; CHECK-NEXT: [[V:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[X_INSERT]], <4 x i32> [[Y_INSERT]])
+; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> poison, <4 x i32> poison)
+; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x i32> [[V]]
;
%x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
@@ -18,9 +18,9 @@ define <4 x i32> @umax_fixed(i32 %x, i32 %y) {
define <vscale x 4 x i32> @umax_scalable(i32 %x, i32 %y) {
; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable(
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
-; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i32 0
-; CHECK-NEXT: [[Y_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y]], i32 0
-; CHECK-NEXT: [[V:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> [[X_INSERT]], <vscale x 4 x i32> [[Y_INSERT]])
+; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison)
+; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <vscale x 4 x i32> [[V]]
;
%x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
@@ -32,8 +32,9 @@ define <vscale x 4 x i32> @umax_scalable(i32 %x, i32 %y) {
define <4 x i32> @umax_fixed_lhs_const(i32 %x) {
; CHECK-LABEL: define <4 x i32> @umax_fixed_lhs_const(
; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
-; CHECK-NEXT: [[V:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> [[X_INSERT]])
+; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 1, i32 [[X]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> poison)
+; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x i32> [[V]]
;
%x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
@@ -44,8 +45,9 @@ define <4 x i32> @umax_fixed_lhs_const(i32 %x) {
define <4 x i32> @umax_fixed_rhs_const(i32 %x) {
; CHECK-LABEL: define <4 x i32> @umax_fixed_rhs_const(
; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
-; CHECK-NEXT: [[V:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[X_INSERT]], <4 x i32> <i32 1, i32 2, i32 3, i32 4>)
+; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 1)
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>)
+; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <4 x i32> [[V]]
;
%x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
@@ -56,8 +58,9 @@ define <4 x i32> @umax_fixed_rhs_const(i32 %x) {
define <vscale x 4 x i32> @umax_scalable_lhs_const(i32 %x) {
; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable_lhs_const(
; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i32 0
-; CHECK-NEXT: [[V:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> splat (i32 42), <vscale x 4 x i32> [[X_INSERT]])
+; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 42, i32 [[X]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> splat (i32 42), <vscale x 4 x i32> poison)
+; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <vscale x 4 x i32> [[V]]
;
%x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
@@ -68,8 +71,9 @@ define <vscale x 4 x i32> @umax_scalable_lhs_const(i32 %x) {
define <vscale x 4 x i32> @umax_scalable_rhs_const(i32 %x) {
; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable_rhs_const(
; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i32 0
-; CHECK-NEXT: [[V:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> [[X_INSERT]], <vscale x 4 x i32> splat (i32 42))
+; CHECK-NEXT: [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 42)
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> splat (i32 42))
+; CHECK-NEXT: [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
; CHECK-NEXT: ret <vscale x 4 x i32> [[V]]
;
%x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
>From 43743048ab11bfd108b377b8d6ba4f6d55472fd9 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 1 May 2025 15:50:24 +0800
Subject: [PATCH 3/9] clang-format
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 7a7c533267f6f..4f018f5af03a5 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1167,7 +1167,8 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
else if (isa<BinaryOperator>(I))
NewVecC = Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
else if (auto *II = dyn_cast<IntrinsicInst>(&I))
- NewVecC = Builder.CreateIntrinsic(VecTy, II->getIntrinsicID(), {VecC0, VecC1});
+ NewVecC =
+ Builder.CreateIntrinsic(VecTy, II->getIntrinsicID(), {VecC0, VecC1});
else
llvm_unreachable("Unexpected instruction type");
Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
>From 968f0613d4960ad4bceb556a3cee90193f28d621 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 1 May 2025 17:21:14 +0800
Subject: [PATCH 4/9] Check isVectorIntrinsicWithScalarOpAtArg
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 6 +++++-
.../VectorCombine/RISCV/intrinsic-scalarize.ll | 13 +++++++++++++
2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4f018f5af03a5..345283862ec60 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1027,7 +1027,11 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
!match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1)))) {
if (auto *II = dyn_cast<IntrinsicInst>(&I);
II && II->arg_size() == 2 &&
- isTriviallyVectorizable(II->getIntrinsicID())) {
+ isTriviallyVectorizable(II->getIntrinsicID()) &&
+ none_of(index_range(0, II->arg_size()), [this, &II](size_t OpIdx) {
+ return isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), OpIdx,
+ &TTI);
+ })) {
Ins0 = II->getArgOperand(0);
Ins1 = II->getArgOperand(1);
} else {
diff --git a/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll b/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
index 5a25f5faf8911..e12b1ca99c6d1 100644
--- a/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
+++ b/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
@@ -95,3 +95,16 @@ define <4 x i32> @non_trivially_vectorizable(i32 %x, i32 %y) {
%v = call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> %x.insert, <8 x i32> %y.insert)
ret <4 x i32> %v
}
+
+; TODO: We should be able to scalarize this if we preserve the scalar argument.
+define <4 x float> @scalar_argument(float %x) {
+; CHECK-LABEL: define <4 x float> @scalar_argument(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x float> poison, float [[X]], i32 0
+; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[X_INSERT]], i32 42)
+; CHECK-NEXT: ret <4 x float> [[V]]
+;
+ %x.insert = insertelement <4 x float> poison, float %x, i32 0
+ %v = call <4 x float> @llvm.powi(<4 x float> %x.insert, i32 42)
+ ret <4 x float> %v
+}
>From d559e157d34c3a90921ead867f4f576e5826cb7d Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 1 May 2025 17:30:59 +0800
Subject: [PATCH 5/9] Just check all arguments have same type as return
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 9 +++++----
.../VectorCombine/RISCV/intrinsic-scalarize.ll | 12 ++++++++++++
2 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 345283862ec60..57a0ca80361bf 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1025,13 +1025,14 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
Value *Ins0, *Ins1;
if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
!match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1)))) {
+ // TODO: Allow unary and ternary intrinsics
+ // TODO: Allow intrinsics with different arguments types
+ // TODO: Allow intrinsics with scalar arguments
if (auto *II = dyn_cast<IntrinsicInst>(&I);
II && II->arg_size() == 2 &&
isTriviallyVectorizable(II->getIntrinsicID()) &&
- none_of(index_range(0, II->arg_size()), [this, &II](size_t OpIdx) {
- return isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), OpIdx,
- &TTI);
- })) {
+ all_of(II->args(),
+ [&II](Value *Arg) { return Arg->getType() == II->getType(); })) {
Ins0 = II->getArgOperand(0);
Ins1 = II->getArgOperand(1);
} else {
diff --git a/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll b/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
index e12b1ca99c6d1..e7683d72a052d 100644
--- a/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
+++ b/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
@@ -108,3 +108,15 @@ define <4 x float> @scalar_argument(float %x) {
%v = call <4 x float> @llvm.powi(<4 x float> %x.insert, i32 42)
ret <4 x float> %v
}
+
+define <4 x i2> @scmp(i32 %x) {
+; CHECK-LABEL: define <4 x i2> @scmp(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
+; CHECK-NEXT: [[V:%.*]] = call <4 x i2> @llvm.scmp.v4i2.v4i32(<4 x i32> [[X_INSERT]], <4 x i32> zeroinitializer)
+; CHECK-NEXT: ret <4 x i2> [[V]]
+;
+ %x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
+ %v = call <4 x i2> @llvm.scmp(<4 x i32> %x.insert, <4 x i32> splat (i32 0))
+ ret <4 x i2> %v
+}
>From 3c3f7e32184e6c89919a6a59ca9c8ded35bd6b10 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 1 May 2025 17:47:43 +0800
Subject: [PATCH 6/9] Fix vector ICA type, add llvm_unreachable
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 57a0ca80361bf..3d9aac56d959b 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1116,8 +1116,9 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
IntrinsicCostAttributes VectorICA(
II->getIntrinsicID(), VecTy,
SmallVector<Type *>(II->arg_size(), VecTy));
- VectorOpCost = TTI.getIntrinsicInstrCost(ScalarICA, CostKind);
- }
+ VectorOpCost = TTI.getIntrinsicInstrCost(VectorICA, CostKind);
+ } else
+ llvm_unreachable("Unexpected instrucion type");
// Get cost estimate for the insert element. This cost will factor into
// both sequences.
>From fea2417523b0bc6bf7ddecde509f8258b23e1d72 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 1 May 2025 17:48:57 +0800
Subject: [PATCH 7/9] Fix comment typo
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 3d9aac56d959b..39dd5141b245d 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1026,7 +1026,7 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
!match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1)))) {
// TODO: Allow unary and ternary intrinsics
- // TODO: Allow intrinsics with different arguments types
+ // TODO: Allow intrinsics with different argument types
// TODO: Allow intrinsics with scalar arguments
if (auto *II = dyn_cast<IntrinsicInst>(&I);
II && II->arg_size() == 2 &&
>From fbce2ad422a90c5d5fb307e43168788ab7ac7120 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Sat, 3 May 2025 20:50:26 +0800
Subject: [PATCH 8/9] Replace llvm_unreachable with cast
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 39dd5141b245d..a2ffbdacb26d1 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1108,7 +1108,8 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
} else if (isa<BinaryOperator>(I)) {
ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy, CostKind);
VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy, CostKind);
- } else if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ } else {
+ auto *II = cast<IntrinsicInst>(&I);
IntrinsicCostAttributes ScalarICA(
II->getIntrinsicID(), ScalarTy,
SmallVector<Type *>(II->arg_size(), ScalarTy));
@@ -1117,8 +1118,7 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
II->getIntrinsicID(), VecTy,
SmallVector<Type *>(II->arg_size(), VecTy));
VectorOpCost = TTI.getIntrinsicInstrCost(VectorICA, CostKind);
- } else
- llvm_unreachable("Unexpected instrucion type");
+ }
// Get cost estimate for the insert element. This cost will factor into
// both sequences.
@@ -1154,10 +1154,9 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
Scalar = Builder.CreateCmp(Pred, V0, V1);
else if (isa<BinaryOperator>(I))
Scalar = Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
- else if (auto *II = dyn_cast<IntrinsicInst>(&I))
- Scalar = Builder.CreateIntrinsic(ScalarTy, II->getIntrinsicID(), {V0, V1});
else
- llvm_unreachable("Unexpected instruction type");
+ Scalar = Builder.CreateIntrinsic(
+ ScalarTy, cast<IntrinsicInst>(I).getIntrinsicID(), {V0, V1});
Scalar->setName(I.getName() + ".scalar");
@@ -1172,11 +1171,9 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
NewVecC = Builder.CreateCmp(Pred, VecC0, VecC1);
else if (isa<BinaryOperator>(I))
NewVecC = Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
- else if (auto *II = dyn_cast<IntrinsicInst>(&I))
- NewVecC =
- Builder.CreateIntrinsic(VecTy, II->getIntrinsicID(), {VecC0, VecC1});
else
- llvm_unreachable("Unexpected instruction type");
+ NewVecC = Builder.CreateIntrinsic(
+ VecTy, cast<IntrinsicInst>(I).getIntrinsicID(), {VecC0, VecC1});
Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
replaceValue(I, *Insert);
return true;
>From c2f403d0c11065d748e85a5eecf4aa0c8236702b Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Sat, 3 May 2025 23:00:52 +0800
Subject: [PATCH 9/9] Move tests out of RISC-V folder
---
.../Transforms/VectorCombine/{RISCV => }/intrinsic-scalarize.ll | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename llvm/test/Transforms/VectorCombine/{RISCV => }/intrinsic-scalarize.ll (100%)
diff --git a/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll b/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll
similarity index 100%
rename from llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
rename to llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll
More information about the llvm-commits
mailing list