[llvm] [InstCombine] Transform `vector.reduce.add` and `splat` into multiplication (PR #161020)
Gábor Spaits via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 28 08:25:19 PDT 2025
https://github.com/spaits updated https://github.com/llvm/llvm-project/pull/161020
>From fa77c2c10596acec00ee517297dc92d2bee09360 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Sat, 27 Sep 2025 22:24:16 +0200
Subject: [PATCH 01/13] [InstCombine] Transform `vector.reduce.add (splat %0,
4)` into `shl i32 %0, 2`
Fixes #160066
Whenever we have a vector with all the same elemnts, created with
`insertelement` and `shufflevector` and the result type's element number is
a power of two and we sum the vector, we have a multiplication by a power of
two, which can be replaced with a left shift.
---
.../InstCombine/InstCombineCalls.cpp | 33 +++++++++
.../InstCombine/vector-reductions.ll | 70 +++++++++++++++++++
2 files changed, 103 insertions(+)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 6ad493772d170..49f6b86fa8f30 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3761,6 +3761,39 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return replaceInstUsesWith(CI, Res);
}
}
+
+ // Handle the case where a value is multiplied by a power of two.
+ // For example:
+ // %2 = insertelement <4 x i32> poison, i32 %0, i64 0
+ // %3 = shufflevector <4 x i32> %2, poison, <4 x i32> zeroinitializer
+ // %4 = tail call i32 @llvm.vector.reduce.add.v4i32(%3)
+ // =>
+ // %2 = shl i32 %0, 2
+ Value *InputValue;
+ ArrayRef<int> Mask;
+ ConstantInt *InsertionIdx;
+ assert(Arg->getType()->isVectorTy() &&
+ "The vector.reduce.add intrinsic's argument must be a vector!");
+
+ if (match(Arg, m_Shuffle(m_InsertElt(m_Poison(), m_Value(InputValue),
+ m_ConstantInt(InsertionIdx)),
+ m_Poison(), m_Mask(Mask)))) {
+ // It is only a multiplication if we add the same element over and over.
+ bool AllElementsAreTheSameInMask =
+ std::all_of(Mask.begin(), Mask.end(),
+ [&Mask](int MaskElt) { return MaskElt == Mask[0]; });
+ unsigned ReducedVectorLength = Mask.size();
+
+ if (AllElementsAreTheSameInMask &&
+ InsertionIdx->getSExtValue() == Mask[0] &&
+ isPowerOf2_32(ReducedVectorLength)) {
+ unsigned Pow2 = Log2_32(ReducedVectorLength);
+ Value *Res = Builder.CreateShl(
+ InputValue, Constant::getIntegerValue(InputValue->getType(),
+ APInt(32, Pow2)));
+ return replaceInstUsesWith(CI, Res);
+ }
+ }
}
[[fallthrough]];
}
diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll
index 10f4aca72dbc7..2547403386106 100644
--- a/llvm/test/Transforms/InstCombine/vector-reductions.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll
@@ -308,3 +308,73 @@ define i32 @diff_of_sums_type_mismatch2(<8 x i32> %v0, <4 x i32> %v1) {
%r = sub i32 %r0, %r1
ret i32 %r
}
+
+define i32 @constant_multiplied_at_0(i32 %0) {
+; CHECK-LABEL: @constant_multiplied_at_0(
+; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 2
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
+ %2 = insertelement <4 x i32> poison, i32 %0, i64 0
+ %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <4 x i32> zeroinitializer
+ %4 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %3)
+ ret i32 %4
+}
+
+define i32 @constant_multiplied_at_0_two_pow8(i32 %0) {
+; CHECK-LABEL: @constant_multiplied_at_0_two_pow8(
+; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 3
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
+ %2 = insertelement <4 x i32> poison, i32 %0, i64 0
+ %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <8 x i32> zeroinitializer
+ %4 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %3)
+ ret i32 %4
+}
+
+
+define i32 @constant_multiplied_at_0_two_pow16(i32 %0) {
+; CHECK-LABEL: @constant_multiplied_at_0_two_pow16(
+; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 4
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
+ %2 = insertelement <4 x i32> poison, i32 %0, i64 0
+ %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <16 x i32> zeroinitializer
+ %4 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %3)
+ ret i32 %4
+}
+
+
+define i32 @constant_multiplied_at_1(i32 %0) {
+; CHECK-LABEL: @constant_multiplied_at_1(
+; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 2
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
+ %2 = insertelement <4 x i32> poison, i32 %0, i64 1
+ %3 = shufflevector <4 x i32> %2, <4 x i32> poison,
+ <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %4 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %3)
+ ret i32 %4
+}
+
+define i32 @negative_constant_multiplied_at_1(i32 %0) {
+; CHECK-LABEL: @negative_constant_multiplied_at_1(
+; CHECK-NEXT: ret i32 poison
+;
+ %2 = insertelement <4 x i32> poison, i32 %0, i64 1
+ %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <4 x i32> zeroinitializer
+ %4 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %3)
+ ret i32 %4
+}
+
+define i32 @negative_constant_multiplied_non_power_of_2(i32 %0) {
+; CHECK-LABEL: @negative_constant_multiplied_non_power_of_2(
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0:%.*]], i64 0
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <6 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.vector.reduce.add.v6i32(<6 x i32> [[TMP3]])
+; CHECK-NEXT: ret i32 [[TMP4]]
+;
+ %2 = insertelement <4 x i32> poison, i32 %0, i64 0
+ %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <6 x i32> zeroinitializer
+ %4 = tail call i32 @llvm.vector.reduce.add.v6i32(<6 x i32> %3)
+ ret i32 %4
+}
>From fb492be297083c777d3a856d0e8248099d8667df Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Sat, 27 Sep 2025 23:17:09 +0200
Subject: [PATCH 02/13] Use getSplatValue and correctly construct APInt and add
i64 test
---
.../InstCombine/InstCombineCalls.cpp | 34 ++++++++-----------
.../InstCombine/vector-reductions.ll | 11 ++++++
2 files changed, 26 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 49f6b86fa8f30..4ca985c2e959d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -64,6 +64,7 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/KnownFPClass.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TypeSize.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
@@ -3769,29 +3770,24 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// %4 = tail call i32 @llvm.vector.reduce.add.v4i32(%3)
// =>
// %2 = shl i32 %0, 2
- Value *InputValue;
- ArrayRef<int> Mask;
- ConstantInt *InsertionIdx;
assert(Arg->getType()->isVectorTy() &&
"The vector.reduce.add intrinsic's argument must be a vector!");
- if (match(Arg, m_Shuffle(m_InsertElt(m_Poison(), m_Value(InputValue),
- m_ConstantInt(InsertionIdx)),
- m_Poison(), m_Mask(Mask)))) {
+ if (Value *Splat = getSplatValue(Arg)) {
// It is only a multiplication if we add the same element over and over.
- bool AllElementsAreTheSameInMask =
- std::all_of(Mask.begin(), Mask.end(),
- [&Mask](int MaskElt) { return MaskElt == Mask[0]; });
- unsigned ReducedVectorLength = Mask.size();
-
- if (AllElementsAreTheSameInMask &&
- InsertionIdx->getSExtValue() == Mask[0] &&
- isPowerOf2_32(ReducedVectorLength)) {
- unsigned Pow2 = Log2_32(ReducedVectorLength);
- Value *Res = Builder.CreateShl(
- InputValue, Constant::getIntegerValue(InputValue->getType(),
- APInt(32, Pow2)));
- return replaceInstUsesWith(CI, Res);
+ ElementCount ReducedVectorElementCount =
+ static_cast<VectorType *>(Arg->getType())->getElementCount();
+ if (ReducedVectorElementCount.isFixed()) {
+ unsigned VectorSize = ReducedVectorElementCount.getFixedValue();
+ if (isPowerOf2_32(VectorSize)) {
+ unsigned Pow2 = Log2_32(VectorSize);
+ Value *Res = Builder.CreateShl(
+ Splat,
+ Constant::getIntegerValue(
+ Splat->getType(),
+ APInt(Splat->getType()->getIntegerBitWidth(), Pow2)));
+ return replaceInstUsesWith(CI, Res);
+ }
}
}
}
diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll
index 2547403386106..d62dce01ac193 100644
--- a/llvm/test/Transforms/InstCombine/vector-reductions.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll
@@ -320,6 +320,17 @@ define i32 @constant_multiplied_at_0(i32 %0) {
ret i32 %4
}
+define i64 @constant_multiplied_at_0_64bits(i64 %0) {
+; CHECK-LABEL: @constant_multiplied_at_0_64bits(
+; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 2
+; CHECK-NEXT: ret i64 [[TMP2]]
+;
+ %2 = insertelement <4 x i64> poison, i64 %0, i64 0
+ %3 = shufflevector <4 x i64> %2, <4 x i64> poison, <4 x i32> zeroinitializer
+ %4 = tail call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %3)
+ ret i64 %4
+}
+
define i32 @constant_multiplied_at_0_two_pow8(i32 %0) {
; CHECK-LABEL: @constant_multiplied_at_0_two_pow8(
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 3
>From e9cc989038328391c207b413487b6c09feaa4611 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Sat, 27 Sep 2025 23:26:42 +0200
Subject: [PATCH 03/13] Address non power of 2 cases
---
.../InstCombine/InstCombineCalls.cpp | 17 ++++++++++-----
.../InstCombine/vector-reductions.ll | 21 +++++++++++++------
2 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 4ca985c2e959d..74c263e86f4a4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3779,15 +3779,22 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
static_cast<VectorType *>(Arg->getType())->getElementCount();
if (ReducedVectorElementCount.isFixed()) {
unsigned VectorSize = ReducedVectorElementCount.getFixedValue();
+ Type *SplatType = Splat->getType();
+ unsigned SplatTypeWidth = SplatType->getIntegerBitWidth();
+ Value *Res;
+ // Power of two is a special case. We can just use a left shif here.
if (isPowerOf2_32(VectorSize)) {
unsigned Pow2 = Log2_32(VectorSize);
- Value *Res = Builder.CreateShl(
- Splat,
- Constant::getIntegerValue(
- Splat->getType(),
- APInt(Splat->getType()->getIntegerBitWidth(), Pow2)));
+ Res = Builder.CreateShl(
+ Splat, Constant::getIntegerValue(SplatType,
+ APInt(SplatTypeWidth, Pow2)));
return replaceInstUsesWith(CI, Res);
}
+ // Otherwise just multiply.
+ Res = Builder.CreateMul(
+ Splat, Constant::getIntegerValue(
+ SplatType, APInt(SplatTypeWidth, VectorSize)));
+ return replaceInstUsesWith(CI, Res);
}
}
}
diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll
index d62dce01ac193..e071415d2d6c1 100644
--- a/llvm/test/Transforms/InstCombine/vector-reductions.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll
@@ -377,15 +377,24 @@ define i32 @negative_constant_multiplied_at_1(i32 %0) {
ret i32 %4
}
-define i32 @negative_constant_multiplied_non_power_of_2(i32 %0) {
-; CHECK-LABEL: @negative_constant_multiplied_non_power_of_2(
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0:%.*]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <6 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.vector.reduce.add.v6i32(<6 x i32> [[TMP3]])
-; CHECK-NEXT: ret i32 [[TMP4]]
+define i32 @constant_multiplied_non_power_of_2(i32 %0) {
+; CHECK-LABEL: @constant_multiplied_non_power_of_2(
+; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0:%.*]], 6
+; CHECK-NEXT: ret i32 [[TMP2]]
;
%2 = insertelement <4 x i32> poison, i32 %0, i64 0
%3 = shufflevector <4 x i32> %2, <4 x i32> poison, <6 x i32> zeroinitializer
%4 = tail call i32 @llvm.vector.reduce.add.v6i32(<6 x i32> %3)
ret i32 %4
}
+
+define i64 @constant_multiplied_non_power_of_2_i64(i64 %0) {
+; CHECK-LABEL: @constant_multiplied_non_power_of_2_i64(
+; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0:%.*]], 6
+; CHECK-NEXT: ret i64 [[TMP2]]
+;
+ %2 = insertelement <4 x i64> poison, i64 %0, i64 0
+ %3 = shufflevector <4 x i64> %2, <4 x i64> poison, <6 x i32> zeroinitializer
+ %4 = tail call i64 @llvm.vector.reduce.add.v6i64(<6 x i64> %3)
+ ret i64 %4
+}
>From a8b32afe1930b3f922676ad12cc0d40f4c08fd31 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Sun, 28 Sep 2025 08:55:10 +0200
Subject: [PATCH 04/13] Update comments and move assertion to a more fitting
place
---
.../Transforms/InstCombine/InstCombineCalls.cpp | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 74c263e86f4a4..4bb9ba39318aa 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3763,18 +3763,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
}
- // Handle the case where a value is multiplied by a power of two.
- // For example:
- // %2 = insertelement <4 x i32> poison, i32 %0, i64 0
- // %3 = shufflevector <4 x i32> %2, poison, <4 x i32> zeroinitializer
- // %4 = tail call i32 @llvm.vector.reduce.add.v4i32(%3)
+ // Handle the case where a splat is summarized. In that case we have a
+ // multpilication. For example: %2 = insertelement <4 x i32> poison, i32
+ // %0, i64 0 %3 = shufflevector <4 x i32> %2, poison, <4 x i32>
+ // zeroinitializer %4 = tail call i32 @llvm.vector.reduce.add.v4i32(%3)
// =>
// %2 = shl i32 %0, 2
- assert(Arg->getType()->isVectorTy() &&
- "The vector.reduce.add intrinsic's argument must be a vector!");
-
if (Value *Splat = getSplatValue(Arg)) {
// It is only a multiplication if we add the same element over and over.
+ assert(Arg->getType()->isVectorTy() &&
+ "The vector.reduce.add intrinsic's argument must be a vector!");
ElementCount ReducedVectorElementCount =
static_cast<VectorType *>(Arg->getType())->getElementCount();
if (ReducedVectorElementCount.isFixed()) {
>From d11a108e80b763a537663e925b1cfdb05caa61ad Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Sun, 28 Sep 2025 11:46:51 +0200
Subject: [PATCH 05/13] Remove redundant power of 2 case
---
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 12 +-----------
1 file changed, 1 insertion(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 4bb9ba39318aa..d745598f8ffd9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3779,17 +3779,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
unsigned VectorSize = ReducedVectorElementCount.getFixedValue();
Type *SplatType = Splat->getType();
unsigned SplatTypeWidth = SplatType->getIntegerBitWidth();
- Value *Res;
- // Power of two is a special case. We can just use a left shif here.
- if (isPowerOf2_32(VectorSize)) {
- unsigned Pow2 = Log2_32(VectorSize);
- Res = Builder.CreateShl(
- Splat, Constant::getIntegerValue(SplatType,
- APInt(SplatTypeWidth, Pow2)));
- return replaceInstUsesWith(CI, Res);
- }
- // Otherwise just multiply.
- Res = Builder.CreateMul(
+ Value *Res = Builder.CreateMul(
Splat, Constant::getIntegerValue(
SplatType, APInt(SplatTypeWidth, VectorSize)));
return replaceInstUsesWith(CI, Res);
>From 01eb5719465b322ba26fb77cbf2d79bbd1ff6b85 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Sun, 28 Sep 2025 11:54:43 +0200
Subject: [PATCH 06/13] Use ConstantInt::get instead of
Constant::getIntegerValue
---
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d745598f8ffd9..a21f9f75e4bcc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3768,7 +3768,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// %0, i64 0 %3 = shufflevector <4 x i32> %2, poison, <4 x i32>
// zeroinitializer %4 = tail call i32 @llvm.vector.reduce.add.v4i32(%3)
// =>
- // %2 = shl i32 %0, 2
+ // %2 = mul i32 %0, 4
if (Value *Splat = getSplatValue(Arg)) {
// It is only a multiplication if we add the same element over and over.
assert(Arg->getType()->isVectorTy() &&
@@ -3778,10 +3778,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (ReducedVectorElementCount.isFixed()) {
unsigned VectorSize = ReducedVectorElementCount.getFixedValue();
Type *SplatType = Splat->getType();
- unsigned SplatTypeWidth = SplatType->getIntegerBitWidth();
- Value *Res = Builder.CreateMul(
- Splat, Constant::getIntegerValue(
- SplatType, APInt(SplatTypeWidth, VectorSize)));
+ Value *Res =
+ Builder.CreateMul(Splat, ConstantInt::get(SplatType, VectorSize));
return replaceInstUsesWith(CI, Res);
}
}
>From 0adef1dd16dc8c8530696557133e8d4da8c8df42 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Sun, 28 Sep 2025 13:33:23 +0200
Subject: [PATCH 07/13] Add i1 test
---
.../Transforms/InstCombine/vector-reductions.ll | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll
index e071415d2d6c1..75948a2575d42 100644
--- a/llvm/test/Transforms/InstCombine/vector-reductions.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll
@@ -398,3 +398,18 @@ define i64 @constant_multiplied_non_power_of_2_i64(i64 %0) {
%4 = tail call i64 @llvm.vector.reduce.add.v6i64(<6 x i64> %3)
ret i64 %4
}
+
+define i1 @constant_multiplied_non_power_of_2_i1(i1 %0) {
+; CHECK-LABEL: @constant_multiplied_non_power_of_2_i1(
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> poison, i1 [[TMP0:%.*]], i64 0
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP6]], <8 x i1> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
+; CHECK-NEXT: [[TMP5:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[TMP4]])
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i8 [[TMP5]] to i1
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %2 = insertelement <8 x i1> poison, i1 %0, i32 0
+ %3 = shufflevector <8 x i1> %2, <8 x i1> poison, <8 x i32> zeroinitializer
+ %4 = tail call i1 @llvm.vector.reduce.add.v6i1(<8 x i1> %3)
+ ret i1 %4
+}
>From d2f235e2f6d8b5368fc7ec7e864cbe977489cba5 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Sun, 28 Sep 2025 13:54:51 +0200
Subject: [PATCH 08/13] More small type tests
---
.../InstCombine/vector-reductions.ll | 42 ++++++++++++++++++-
1 file changed, 41 insertions(+), 1 deletion(-)
diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll
index 75948a2575d42..d786363075c1a 100644
--- a/llvm/test/Transforms/InstCombine/vector-reductions.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll
@@ -410,6 +410,46 @@ define i1 @constant_multiplied_non_power_of_2_i1(i1 %0) {
;
%2 = insertelement <8 x i1> poison, i1 %0, i32 0
%3 = shufflevector <8 x i1> %2, <8 x i1> poison, <8 x i32> zeroinitializer
- %4 = tail call i1 @llvm.vector.reduce.add.v6i1(<8 x i1> %3)
+ %4 = tail call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> %3)
ret i1 %4
}
+
+define i1 @constant_multiplied_non_power_of_2_i1x4(i1 %0) {
+; CHECK-LABEL: @constant_multiplied_non_power_of_2_i1x4(
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i1> poison, i1 [[TMP0:%.*]], i64 0
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
+; CHECK-NEXT: [[TMP5:%.*]] = call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = trunc i4 [[TMP5]] to i1
+; CHECK-NEXT: ret i1 [[TMP6]]
+;
+ %2 = insertelement <4 x i1> poison, i1 %0, i32 0
+ %3 = shufflevector <4 x i1> %2, <4 x i1> poison, <4 x i32> zeroinitializer
+ %4 = tail call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %3)
+ ret i1 %4
+}
+
+define i1 @constant_multiplied_non_power_of_2_i1x2(i1 %0) {
+; CHECK-LABEL: @constant_multiplied_non_power_of_2_i1x2(
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i1> poison, i1 [[TMP0:%.*]], i64 0
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i1> [[TMP3]] to i2
+; CHECK-NEXT: [[TMP5:%.*]] = call range(i2 0, -1) i2 @llvm.ctpop.i2(i2 [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = trunc i2 [[TMP5]] to i1
+; CHECK-NEXT: ret i1 [[TMP6]]
+;
+ %2 = insertelement <2 x i1> poison, i1 %0, i32 0
+ %3 = shufflevector <2 x i1> %2, <2 x i1> poison, <2 x i32> zeroinitializer
+ %4 = tail call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> %3)
+ ret i1 %4
+}
+
+define i2 @constant_multiplied_non_power_of_2_i2x4(i2 %0) {
+; CHECK-LABEL: @constant_multiplied_non_power_of_2_i2x4(
+; CHECK-NEXT: ret i2 0
+;
+ %2 = insertelement <4 x i2> poison, i2 %0, i32 0
+ %3 = shufflevector <4 x i2> %2, <4 x i2> poison, <4 x i32> zeroinitializer
+ %4 = tail call i2 @llvm.vector.reduce.add.v4i2(<4 x i2> %3)
+ ret i2 %4
+}
>From 045f0efae1d6e6efa9fcc8456c1965a005d3c951 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Sun, 28 Sep 2025 17:01:39 +0200
Subject: [PATCH 09/13] Throw out redundant i1 tests
---
.../InstCombine/vector-reductions.ll | 30 -------------------
1 file changed, 30 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll
index d786363075c1a..30f9e49f9fe10 100644
--- a/llvm/test/Transforms/InstCombine/vector-reductions.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll
@@ -414,36 +414,6 @@ define i1 @constant_multiplied_non_power_of_2_i1(i1 %0) {
ret i1 %4
}
-define i1 @constant_multiplied_non_power_of_2_i1x4(i1 %0) {
-; CHECK-LABEL: @constant_multiplied_non_power_of_2_i1x4(
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i1> poison, i1 [[TMP0:%.*]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
-; CHECK-NEXT: [[TMP5:%.*]] = call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 [[TMP4]])
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i4 [[TMP5]] to i1
-; CHECK-NEXT: ret i1 [[TMP6]]
-;
- %2 = insertelement <4 x i1> poison, i1 %0, i32 0
- %3 = shufflevector <4 x i1> %2, <4 x i1> poison, <4 x i32> zeroinitializer
- %4 = tail call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %3)
- ret i1 %4
-}
-
-define i1 @constant_multiplied_non_power_of_2_i1x2(i1 %0) {
-; CHECK-LABEL: @constant_multiplied_non_power_of_2_i1x2(
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i1> poison, i1 [[TMP0:%.*]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i1> [[TMP3]] to i2
-; CHECK-NEXT: [[TMP5:%.*]] = call range(i2 0, -1) i2 @llvm.ctpop.i2(i2 [[TMP4]])
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i2 [[TMP5]] to i1
-; CHECK-NEXT: ret i1 [[TMP6]]
-;
- %2 = insertelement <2 x i1> poison, i1 %0, i32 0
- %3 = shufflevector <2 x i1> %2, <2 x i1> poison, <2 x i32> zeroinitializer
- %4 = tail call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> %3)
- ret i1 %4
-}
-
define i2 @constant_multiplied_non_power_of_2_i2x4(i2 %0) {
; CHECK-LABEL: @constant_multiplied_non_power_of_2_i2x4(
; CHECK-NEXT: ret i2 0
>From 027efe7c0bfb0a377acf0ef4b71bc46124f13fb5 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Sun, 28 Sep 2025 17:06:10 +0200
Subject: [PATCH 10/13] More consistent test naming
---
.../InstCombine/vector-reductions.ll | 48 +++++++++----------
1 file changed, 24 insertions(+), 24 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll
index 30f9e49f9fe10..5a2e3e73978a4 100644
--- a/llvm/test/Transforms/InstCombine/vector-reductions.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll
@@ -309,8 +309,8 @@ define i32 @diff_of_sums_type_mismatch2(<8 x i32> %v0, <4 x i32> %v1) {
ret i32 %r
}
-define i32 @constant_multiplied_at_0(i32 %0) {
-; CHECK-LABEL: @constant_multiplied_at_0(
+define i32 @constant_multiplied_4xi32(i32 %0) {
+; CHECK-LABEL: @constant_multiplied_4xi32(
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 2
; CHECK-NEXT: ret i32 [[TMP2]]
;
@@ -320,8 +320,8 @@ define i32 @constant_multiplied_at_0(i32 %0) {
ret i32 %4
}
-define i64 @constant_multiplied_at_0_64bits(i64 %0) {
-; CHECK-LABEL: @constant_multiplied_at_0_64bits(
+define i64 @constant_multiplied_4xi64(i64 %0) {
+; CHECK-LABEL: @constant_multiplied_4xi64(
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 2
; CHECK-NEXT: ret i64 [[TMP2]]
;
@@ -331,8 +331,8 @@ define i64 @constant_multiplied_at_0_64bits(i64 %0) {
ret i64 %4
}
-define i32 @constant_multiplied_at_0_two_pow8(i32 %0) {
-; CHECK-LABEL: @constant_multiplied_at_0_two_pow8(
+define i32 @constant_multiplied_8xi32(i32 %0) {
+; CHECK-LABEL: @constant_multiplied_8xi32(
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 3
; CHECK-NEXT: ret i32 [[TMP2]]
;
@@ -343,8 +343,8 @@ define i32 @constant_multiplied_at_0_two_pow8(i32 %0) {
}
-define i32 @constant_multiplied_at_0_two_pow16(i32 %0) {
-; CHECK-LABEL: @constant_multiplied_at_0_two_pow16(
+define i32 @constant_multiplied_16xi32(i32 %0) {
+; CHECK-LABEL: @constant_multiplied_16xi32(
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 4
; CHECK-NEXT: ret i32 [[TMP2]]
;
@@ -355,8 +355,8 @@ define i32 @constant_multiplied_at_0_two_pow16(i32 %0) {
}
-define i32 @constant_multiplied_at_1(i32 %0) {
-; CHECK-LABEL: @constant_multiplied_at_1(
+define i32 @constant_multiplied_4xi32_at_idx1(i32 %0) {
+; CHECK-LABEL: @constant_multiplied_4xi32_at_idx1(
; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 2
; CHECK-NEXT: ret i32 [[TMP2]]
;
@@ -367,8 +367,8 @@ define i32 @constant_multiplied_at_1(i32 %0) {
ret i32 %4
}
-define i32 @negative_constant_multiplied_at_1(i32 %0) {
-; CHECK-LABEL: @negative_constant_multiplied_at_1(
+define i32 @negative_constant_multiplied_4xi32(i32 %0) {
+; CHECK-LABEL: @negative_constant_multiplied_4xi32(
; CHECK-NEXT: ret i32 poison
;
%2 = insertelement <4 x i32> poison, i32 %0, i64 1
@@ -377,8 +377,8 @@ define i32 @negative_constant_multiplied_at_1(i32 %0) {
ret i32 %4
}
-define i32 @constant_multiplied_non_power_of_2(i32 %0) {
-; CHECK-LABEL: @constant_multiplied_non_power_of_2(
+define i32 @constant_multiplied_6xi32(i32 %0) {
+; CHECK-LABEL: @constant_multiplied_6xi32(
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0:%.*]], 6
; CHECK-NEXT: ret i32 [[TMP2]]
;
@@ -388,8 +388,8 @@ define i32 @constant_multiplied_non_power_of_2(i32 %0) {
ret i32 %4
}
-define i64 @constant_multiplied_non_power_of_2_i64(i64 %0) {
-; CHECK-LABEL: @constant_multiplied_non_power_of_2_i64(
+define i64 @constant_multiplied_6xi64(i64 %0) {
+; CHECK-LABEL: @constant_multiplied_6xi64(
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0:%.*]], 6
; CHECK-NEXT: ret i64 [[TMP2]]
;
@@ -399,14 +399,14 @@ define i64 @constant_multiplied_non_power_of_2_i64(i64 %0) {
ret i64 %4
}
-define i1 @constant_multiplied_non_power_of_2_i1(i1 %0) {
-; CHECK-LABEL: @constant_multiplied_non_power_of_2_i1(
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> poison, i1 [[TMP0:%.*]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP6]], <8 x i1> poison, <8 x i32> zeroinitializer
+define i1 @constant_multiplied_8xi1(i1 %0) {
+; CHECK-LABEL: @constant_multiplied_8xi1(
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i1> poison, i1 [[TMP0:%.*]], i64 0
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
; CHECK-NEXT: [[TMP5:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[TMP4]])
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i8 [[TMP5]] to i1
-; CHECK-NEXT: ret i1 [[TMP2]]
+; CHECK-NEXT: [[TMP6:%.*]] = trunc i8 [[TMP5]] to i1
+; CHECK-NEXT: ret i1 [[TMP6]]
;
%2 = insertelement <8 x i1> poison, i1 %0, i32 0
%3 = shufflevector <8 x i1> %2, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -414,8 +414,8 @@ define i1 @constant_multiplied_non_power_of_2_i1(i1 %0) {
ret i1 %4
}
-define i2 @constant_multiplied_non_power_of_2_i2x4(i2 %0) {
-; CHECK-LABEL: @constant_multiplied_non_power_of_2_i2x4(
+define i2 @constant_multiplied_4xi2(i2 %0) {
+; CHECK-LABEL: @constant_multiplied_4xi2(
; CHECK-NEXT: ret i2 0
;
%2 = insertelement <4 x i2> poison, i2 %0, i32 0
>From 8e2c2e57ac049abd152a836f4a2c5da36b8dedb9 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Sun, 28 Sep 2025 17:07:46 +0200
Subject: [PATCH 11/13] Use cast instead of static_cast
---
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index a21f9f75e4bcc..4de8758d170fa 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3774,7 +3774,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
assert(Arg->getType()->isVectorTy() &&
"The vector.reduce.add intrinsic's argument must be a vector!");
ElementCount ReducedVectorElementCount =
- static_cast<VectorType *>(Arg->getType())->getElementCount();
+ cast<VectorType>(Arg->getType())->getElementCount();
if (ReducedVectorElementCount.isFixed()) {
unsigned VectorSize = ReducedVectorElementCount.getFixedValue();
Type *SplatType = Splat->getType();
>From ff6491be34198bb5a1a5c99d92a9bc146cb96a73 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Sun, 28 Sep 2025 17:09:48 +0200
Subject: [PATCH 12/13] Use BinaryOperator::CreateMul instead of using Builder
and replaceInstUsesWith
---
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 4de8758d170fa..bfc7ed69d8b79 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3778,9 +3778,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (ReducedVectorElementCount.isFixed()) {
unsigned VectorSize = ReducedVectorElementCount.getFixedValue();
Type *SplatType = Splat->getType();
- Value *Res =
- Builder.CreateMul(Splat, ConstantInt::get(SplatType, VectorSize));
- return replaceInstUsesWith(CI, Res);
+ return BinaryOperator::CreateMul(
+ Splat, ConstantInt::get(SplatType, VectorSize));
}
}
}
>From 38ca5ce0a39f0a9aa0c27b7e0fc66a937eaaa306 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Sun, 28 Sep 2025 17:17:38 +0200
Subject: [PATCH 13/13] Extend testing
---
.../InstCombine/vector-reductions.ll | 43 +++++++++++++++++++
1 file changed, 43 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll
index 5a2e3e73978a4..4355772fd23c1 100644
--- a/llvm/test/Transforms/InstCombine/vector-reductions.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll
@@ -320,6 +320,17 @@ define i32 @constant_multiplied_4xi32(i32 %0) {
ret i32 %4
}
+define i32 @constant_multiplied_3xi32(i32 %0) {
+; CHECK-LABEL: @constant_multiplied_3xi32(
+; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0:%.*]], 3
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
+ %2 = insertelement <3 x i32> poison, i32 %0, i64 0
+ %3 = shufflevector <3 x i32> %2, <3 x i32> poison, <3 x i32> zeroinitializer
+ %4 = tail call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> %3)
+ ret i32 %4
+}
+
define i64 @constant_multiplied_4xi64(i64 %0) {
; CHECK-LABEL: @constant_multiplied_4xi64(
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 2
@@ -423,3 +434,35 @@ define i2 @constant_multiplied_4xi2(i2 %0) {
%4 = tail call i2 @llvm.vector.reduce.add.v4i2(<4 x i2> %3)
ret i2 %4
}
+
+define i2 @constant_multiplied_5xi2(i2 %0) {
+; CHECK-LABEL: @constant_multiplied_5xi2(
+; CHECK-NEXT: ret i2 [[TMP0:%.*]]
+;
+ %2 = insertelement <5 x i2> poison, i2 %0, i64 0
+ %3 = shufflevector <5 x i2> %2, <5 x i2> poison, <5 x i32> zeroinitializer
+ %4 = tail call i2 @llvm.vector.reduce.add.v5i2(<5 x i2> %3)
+ ret i2 %4
+}
+
+define i2 @constant_multiplied_6xi2(i2 %0) {
+; CHECK-LABEL: @constant_multiplied_6xi2(
+; CHECK-NEXT: [[TMP2:%.*]] = shl i2 [[TMP0:%.*]], 1
+; CHECK-NEXT: ret i2 [[TMP2]]
+;
+ %2 = insertelement <6 x i2> poison, i2 %0, i64 0
+ %3 = shufflevector <6 x i2> %2, <6 x i2> poison, <6 x i32> zeroinitializer
+ %4 = tail call i2 @llvm.vector.reduce.add.v6i2(<6 x i2> %3)
+ ret i2 %4
+}
+
+define i2 @constant_multiplied_7xi2(i2 %0) {
+; CHECK-LABEL: @constant_multiplied_7xi2(
+; CHECK-NEXT: [[TMP2:%.*]] = sub i2 0, [[TMP0:%.*]]
+; CHECK-NEXT: ret i2 [[TMP2]]
+;
+ %2 = insertelement <7 x i2> poison, i2 %0, i64 0
+ %3 = shufflevector <7 x i2> %2, <7 x i2> poison, <7 x i32> zeroinitializer
+ %4 = tail call i2 @llvm.vector.reduce.add.v7i2(<7 x i2> %3)
+ ret i2 %4
+}
More information about the llvm-commits
mailing list