[llvm] 95ef1a5 - [SLP] Use the correct identity when combining binary opcodes with AND/MUL (#180457)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 12 09:34:53 PST 2026
Author: Ryan Buchner
Date: 2026-02-12T09:34:44-08:00
New Revision: 95ef1a5c3139c44bb171df225cb2b0aa17b50f1d
URL: https://github.com/llvm/llvm-project/commit/95ef1a5c3139c44bb171df225cb2b0aa17b50f1d
DIFF: https://github.com/llvm/llvm-project/commit/95ef1a5c3139c44bb171df225cb2b0aa17b50f1d.diff
LOG: [SLP] Use the correct identity when combining binary opcodes with AND/MUL (#180457)
Fixes #180456
Fix bug in the following SLP lowering:
```
define void @sub_mul(ptr %p, ptr %s) {
entry:
%p1 = getelementptr i16, ptr %p, i64 1
%l0 = load i16, ptr %p
%l1 = load i16, ptr %p1
%mul0 = sub i16 %l0, 0
%mul1 = mul i16 %l1, 5
%s1 = getelementptr i16, ptr %s, i64 1
store i16 %mul0, ptr %s
store i16 %mul1, ptr %s1
ret void
}
```
to
```
define void @sub_mul(ptr %p, ptr %s) {
entry:
%tmp0 = load <2 x i16>, ptr %p, align 2
%tmp1 = mul <2 x i16> %tmp0, <i16 0, i16 5> -> updates to <i16 1, i16 5>
store <2 x i16> %tmp1, ptr %s, align 2
ret void
}
```
Added:
llvm/test/Transforms/SLPVectorizer/semanticly-same.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
llvm/test/Transforms/SLPVectorizer/X86/copyable-child-node-used-outside.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8fb88a1fac0ef..324c5729c3f5d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1080,55 +1080,56 @@ class BinOpSameOpcodeHelper {
auto [CI, Pos] = isBinOpWithConstantInt(I);
const APInt &FromCIValue = CI->getValue();
unsigned FromCIValueBitWidth = FromCIValue.getBitWidth();
- APInt ToCIValue;
+ Type *RHSType = I->getOperand(Pos)->getType();
+ Constant *RHS;
switch (FromOpcode) {
case Instruction::Shl:
if (ToOpcode == Instruction::Mul) {
- ToCIValue = APInt::getOneBitSet(FromCIValueBitWidth,
- FromCIValue.getZExtValue());
+ RHS = ConstantInt::get(
+ RHSType, APInt::getOneBitSet(FromCIValueBitWidth,
+ FromCIValue.getZExtValue()));
} else {
assert(FromCIValue.isZero() && "Cannot convert the instruction.");
- ToCIValue = ToOpcode == Instruction::And
- ? APInt::getAllOnes(FromCIValueBitWidth)
- : APInt::getZero(FromCIValueBitWidth);
+ RHS = ConstantExpr::getBinOpIdentity(ToOpcode, RHSType,
+ /*AllowRHSConstant=*/true);
}
break;
case Instruction::Mul:
assert(FromCIValue.isPowerOf2() && "Cannot convert the instruction.");
if (ToOpcode == Instruction::Shl) {
- ToCIValue = APInt(FromCIValueBitWidth, FromCIValue.logBase2());
+ RHS = ConstantInt::get(
+ RHSType, APInt(FromCIValueBitWidth, FromCIValue.logBase2()));
} else {
assert(FromCIValue.isOne() && "Cannot convert the instruction.");
- ToCIValue = ToOpcode == Instruction::And
- ? APInt::getAllOnes(FromCIValueBitWidth)
- : APInt::getZero(FromCIValueBitWidth);
+ RHS = ConstantExpr::getBinOpIdentity(ToOpcode, RHSType,
+ /*AllowRHSConstant=*/true);
}
break;
case Instruction::Add:
case Instruction::Sub:
if (FromCIValue.isZero()) {
- ToCIValue = APInt::getZero(FromCIValueBitWidth);
+ RHS = ConstantExpr::getBinOpIdentity(ToOpcode, RHSType,
+ /*AllowRHSConstant=*/true);
} else {
assert(is_contained({Instruction::Add, Instruction::Sub}, ToOpcode) &&
"Cannot convert the instruction.");
- ToCIValue = FromCIValue;
- ToCIValue.negate();
+ APInt NegatedVal = APInt(FromCIValue);
+ NegatedVal.negate();
+ RHS = ConstantInt::get(RHSType, NegatedVal);
}
break;
case Instruction::And:
assert(FromCIValue.isAllOnes() && "Cannot convert the instruction.");
- ToCIValue = ToOpcode == Instruction::Mul
- ? APInt::getOneBitSet(FromCIValueBitWidth, 0)
- : APInt::getZero(FromCIValueBitWidth);
+ RHS = ConstantExpr::getBinOpIdentity(ToOpcode, RHSType,
+ /*AllowRHSConstant=*/true);
break;
default:
assert(FromCIValue.isZero() && "Cannot convert the instruction.");
- ToCIValue = APInt::getZero(FromCIValueBitWidth);
+ RHS = ConstantExpr::getBinOpIdentity(ToOpcode, RHSType,
+ /*AllowRHSConstant=*/true);
break;
}
Value *LHS = I->getOperand(1 - Pos);
- Constant *RHS =
- ConstantInt::get(I->getOperand(Pos)->getType(), ToCIValue);
// constant + x cannot be -constant - x
// instead, it should be x - -constant
if (Pos == 1 ||
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
index 03a89e54e4212..aa424b9031e77 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll
@@ -13,7 +13,7 @@ define void @test() {
; CHECK: [[BB6]]:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP8:%.*]], %[[BB6]] ]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 4>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 4>
; CHECK-NEXT: [[TMP4]] = mul <4 x i32> [[TMP3]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[TMP8]] = mul <2 x i32> zeroinitializer, [[TMP7]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll b/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
index c4ddc5d63cc04..21a93e57f6ec6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
@@ -5,7 +5,7 @@ define i16 @test(i16 %v1, i16 %v2) {
; CHECK-LABEL: define i16 @test(
; CHECK-SAME: i16 [[V1:%.*]], i16 [[V2:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[V2]], i32 3
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> <i16 -1, i16 -1, i16 -1, i16 poison>, i16 [[V2]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[V1]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i16> [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 poison, i32 3>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/copyable-child-node-used-outside.ll b/llvm/test/Transforms/SLPVectorizer/X86/copyable-child-node-used-outside.ll
index f472f77c281ea..cb0fd96856262 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/copyable-child-node-used-outside.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/copyable-child-node-used-outside.ll
@@ -10,7 +10,7 @@ define <4 x i32> @test() {
; CHECK-NEXT: [[OR:%.*]] = or i32 [[TRUNC]], 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>, i32 [[TRUNC]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 0>
-; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> <i32 -1, i32 0, i32 0, i32 0>, [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> <i32 -1, i32 0, i32 -1, i32 -1>, [[TMP2]]
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[OR]] to i64
; CHECK-NEXT: br label %[[BB3:.*]]
; CHECK: [[BB3]]:
diff --git a/llvm/test/Transforms/SLPVectorizer/semanticly-same.ll b/llvm/test/Transforms/SLPVectorizer/semanticly-same.ll
new file mode 100644
index 0000000000000..c434fb5c97c07
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/semanticly-same.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-50 < %s | FileCheck %s %}
+; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu -slp-threshold=-50 < %s | FileCheck %s %}
+
+; Don't care about the profitability with these tests, just want to demonstrate the ability
+; to combine opcodes
+
+define void @sub_mul(ptr %p, ptr %s) {
+; CHECK-LABEL: define void @sub_mul(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[S:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P]], align 2
+; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i16> [[TMP0]], <i16 1, i16 5, i16 2, i16 3>
+; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S]], align 2
+; CHECK-NEXT: ret void
+;
+entry:
+ %p1 = getelementptr i16, ptr %p, i64 1
+ %p2 = getelementptr i16, ptr %p, i64 2
+ %p3 = getelementptr i16, ptr %p, i64 3
+
+ %l0 = load i16, ptr %p
+ %l1 = load i16, ptr %p1
+ %l2 = load i16, ptr %p2
+ %l3 = load i16, ptr %p3
+
+ %mul0 = sub i16 %l0, 0
+ %mul1 = mul i16 %l1, 5
+ %mul2 = mul i16 %l2, 2
+ %mul3 = mul i16 %l3, 3
+
+ %s1 = getelementptr i16, ptr %s, i64 1
+ %s2 = getelementptr i16, ptr %s, i64 2
+ %s3 = getelementptr i16, ptr %s, i64 3
+
+ store i16 %mul0, ptr %s
+ store i16 %mul1, ptr %s1
+ store i16 %mul2, ptr %s2
+ store i16 %mul3, ptr %s3
+ ret void
+}
+
+define void @add_mul(ptr %p, ptr %s) {
+; CHECK-LABEL: define void @add_mul(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[S:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P]], align 2
+; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i16> [[TMP0]], <i16 1, i16 5, i16 2, i16 3>
+; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S]], align 2
+; CHECK-NEXT: ret void
+;
+entry:
+ %p1 = getelementptr i16, ptr %p, i64 1
+ %p2 = getelementptr i16, ptr %p, i64 2
+ %p3 = getelementptr i16, ptr %p, i64 3
+
+ %l0 = load i16, ptr %p
+ %l1 = load i16, ptr %p1
+ %l2 = load i16, ptr %p2
+ %l3 = load i16, ptr %p3
+
+ %mul0 = add i16 %l0, 0
+ %mul1 = mul i16 %l1, 5
+ %mul2 = mul i16 %l2, 2
+ %mul3 = mul i16 %l3, 3
+
+ %s1 = getelementptr i16, ptr %s, i64 1
+ %s2 = getelementptr i16, ptr %s, i64 2
+ %s3 = getelementptr i16, ptr %s, i64 3
+
+ store i16 %mul0, ptr %s
+ store i16 %mul1, ptr %s1
+ store i16 %mul2, ptr %s2
+ store i16 %mul3, ptr %s3
+ ret void
+}
+
+define void @sub_and(ptr %p, ptr %s) {
+; CHECK-LABEL: define void @sub_and(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[S:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P]], align 2
+; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i16> [[TMP0]], <i16 -1, i16 5, i16 2, i16 3>
+; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S]], align 2
+; CHECK-NEXT: ret void
+;
+entry:
+ %p1 = getelementptr i16, ptr %p, i64 1
+ %p2 = getelementptr i16, ptr %p, i64 2
+ %p3 = getelementptr i16, ptr %p, i64 3
+
+ %l0 = load i16, ptr %p
+ %l1 = load i16, ptr %p1
+ %l2 = load i16, ptr %p2
+ %l3 = load i16, ptr %p3
+
+ %mul0 = add i16 %l0, 0
+ %mul1 = and i16 %l1, 5
+ %mul2 = and i16 %l2, 2
+ %mul3 = and i16 %l3, 3
+
+ %s1 = getelementptr i16, ptr %s, i64 1
+ %s2 = getelementptr i16, ptr %s, i64 2
+ %s3 = getelementptr i16, ptr %s, i64 3
+
+ store i16 %mul0, ptr %s
+ store i16 %mul1, ptr %s1
+ store i16 %mul2, ptr %s2
+ store i16 %mul3, ptr %s3
+ ret void
+}
More information about the llvm-commits
mailing list