[llvm] [InstCombine] Add one-use limitation to box multiply fold (PR #72876)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 4 02:34:16 PST 2023
https://github.com/shaojingzhi updated https://github.com/llvm/llvm-project/pull/72876
>From 91c77fc4e6784a1e35f2be5e6e4d5bf7ea950827 Mon Sep 17 00:00:00 2001
From: shaojingzhi <28193696+shaojingzhi at users.noreply.github.com>
Date: Mon, 20 Nov 2023 22:09:43 +0800
Subject: [PATCH 1/5] Update InstCombineAddSub.cpp
Add a situation that mul cannot be replaced by add and shl.
---
llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 90b1c133461a4..5b82c3179792f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1405,6 +1405,14 @@ static Instruction *foldBoxMultiply(BinaryOperator &I) {
// ResLo = (CrossSum << HalfBits) + (YLo * XLo)
Value *XLo, *YLo;
Value *CrossSum;
+
+ // Checking the operands of I is used in no more than one place,
+ // which can not be deleted, cause a mul instruction has far more weight than
+ // add and shl instruction in IR, thus this method cannot achieve the goal of
+ // simplifying instructions, just return null.
+ if ((!I.getOperand(0)->hasOneUser() || !I.getOperand(1)->hasOneUser()))
+ return nullptr;
+
if (!match(&I, m_c_Add(m_Shl(m_Value(CrossSum), m_SpecificInt(HalfBits)),
m_Mul(m_Value(YLo), m_Value(XLo)))))
return nullptr;
>From c6c87dbab6cba862cd0da4c6bf72b6e24fa0f613 Mon Sep 17 00:00:00 2001
From: shaojingzhi <28193696+shaojingzhi at users.noreply.github.com>
Date: Mon, 20 Nov 2023 22:12:34 +0800
Subject: [PATCH 2/5] Update mul_full_64.ll
---
llvm/test/Transforms/InstCombine/mul_full_64.ll | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/llvm/test/Transforms/InstCombine/mul_full_64.ll b/llvm/test/Transforms/InstCombine/mul_full_64.ll
index 8a57b548cd14b..5c57270fb147d 100644
--- a/llvm/test/Transforms/InstCombine/mul_full_64.ll
+++ b/llvm/test/Transforms/InstCombine/mul_full_64.ll
@@ -177,6 +177,7 @@ define i64 @mul_full_64_variant2(i64 %a, i64 %b, ptr nocapture %rhi) {
ret i64 %add27
}
+; Negative test case for mul_fold function: MUL7 is used in more than one place
define i64 @mul_full_64_variant3(i64 %a, i64 %b, ptr nocapture %rhi) {
; CHECK-LABEL: @mul_full_64_variant3(
; CHECK-NEXT: [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295
@@ -196,7 +197,9 @@ define i64 @mul_full_64_variant3(i64 %a, i64 %b, ptr nocapture %rhi) {
; CHECK-NEXT: [[SHR_I:%.*]] = lshr i64 [[ADD15]], 32
; CHECK-NEXT: [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I]]
; CHECK-NEXT: store i64 [[ADD17]], ptr [[RHI:%.*]], align 8
-; CHECK-NEXT: [[ADD19:%.*]] = mul i64 [[A]], [[B]]
+; CHECK-NEXT: [[ADD18:%.*]] = add i64 [[MUL6]], [[MUL5]]
+; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[ADD18]], 32
+; CHECK-NEXT: [[ADD19:%.*]] = add i64 [[SHL]], [[MUL7]]
; CHECK-NEXT: ret i64 [[ADD19]]
;
%conv = and i64 %a, 4294967295
>From 76097e23f035d2c3e12fa20ea5edd00f1a43f86f Mon Sep 17 00:00:00 2001
From: shaojingzhi <28193696+shaojingzhi at users.noreply.github.com>
Date: Wed, 22 Nov 2023 15:45:00 +0800
Subject: [PATCH 3/5] Update
llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
Co-authored-by: Yingwei Zheng <dtcxzyw at qq.com>
---
llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 5b82c3179792f..e7e9dfdb7acbb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1410,11 +1410,8 @@ static Instruction *foldBoxMultiply(BinaryOperator &I) {
// which can not be deleted, cause a mul instruction has far more weight than
// add and shl instruction in IR, thus this method cannot achieve the goal of
// simplifying instructions, just return null.
- if ((!I.getOperand(0)->hasOneUser() || !I.getOperand(1)->hasOneUser()))
- return nullptr;
-
if (!match(&I, m_c_Add(m_Shl(m_Value(CrossSum), m_SpecificInt(HalfBits)),
- m_Mul(m_Value(YLo), m_Value(XLo)))))
+ m_OneUse(m_Mul(m_Value(YLo), m_Value(XLo))))))
return nullptr;
// XLo = X & HalfMask
>From 512db8aa2cb853f703eb493fa2fd2462c9ef9aef Mon Sep 17 00:00:00 2001
From: shaojingzhi <28193696+shaojingzhi at users.noreply.github.com>
Date: Sat, 25 Nov 2023 17:06:24 +0800
Subject: [PATCH 4/5] Update
llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
Modify annotation.
Co-authored-by: Nikita Popov <github at npopov.com>
---
llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index e7e9dfdb7acbb..7c3401e9b7a67 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1406,10 +1406,8 @@ static Instruction *foldBoxMultiply(BinaryOperator &I) {
Value *XLo, *YLo;
Value *CrossSum;
- // Checking the operands of I is used in no more than one place,
- // which can not be deleted, cause a mul instruction has far more weight than
- // add and shl instruction in IR, thus this method cannot achieve the goal of
- // simplifying instructions, just return null.
+ // Require one-use on the multiply to avoid increasing the number of
+ // multiplications.
if (!match(&I, m_c_Add(m_Shl(m_Value(CrossSum), m_SpecificInt(HalfBits)),
m_OneUse(m_Mul(m_Value(YLo), m_Value(XLo))))))
return nullptr;
>From 50d5689a93e43cba1d5d0af4423f508fa6105164 Mon Sep 17 00:00:00 2001
From: shaojingzhi <shaojingzhi98 at gmail.com>
Date: Mon, 4 Dec 2023 17:45:56 +0800
Subject: [PATCH 5/5] Add test case
Add test case to show shl does not need hasOneUse constraint
---
.../InstCombine/InstCombineAddSub.cpp | 1 -
llvm/test/Transforms/InstCombine/mul_fold.ll | 27 +++++++++++++++++++
2 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 7c3401e9b7a67..e1d65b9bfa061 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1405,7 +1405,6 @@ static Instruction *foldBoxMultiply(BinaryOperator &I) {
// ResLo = (CrossSum << HalfBits) + (YLo * XLo)
Value *XLo, *YLo;
Value *CrossSum;
-
// Require one-use on the multiply to avoid increasing the number of
// multiplications.
if (!match(&I, m_c_Add(m_Shl(m_Value(CrossSum), m_SpecificInt(HalfBits)),
diff --git a/llvm/test/Transforms/InstCombine/mul_fold.ll b/llvm/test/Transforms/InstCombine/mul_fold.ll
index d20ac6070d108..a1fdec3c68cc4 100644
--- a/llvm/test/Transforms/InstCombine/mul_fold.ll
+++ b/llvm/test/Transforms/InstCombine/mul_fold.ll
@@ -712,3 +712,30 @@ define i8 @mul8_low_miss_half_width(i8 %in0, i8 %in1) {
%retLo = add i8 %shl, %m00
ret i8 %retLo
}
+
+; Test case to show shl doesn't need hasOneUse constraint
+define i32 @mul32_low_extra_shl_use(i32 %in0, i32 %in1) {
+; CHECK-LABEL: @mul32_low_extra_shl_use(
+; CHECK-NEXT: [[IN0HI:%.*]] = lshr i32 [[IN0:%.*]], 16
+; CHECK-NEXT: [[IN1HI:%.*]] = lshr i32 [[IN1:%.*]], 16
+; CHECK-NEXT: [[M10:%.*]] = mul i32 [[IN1HI]], [[IN0]]
+; CHECK-NEXT: [[M01:%.*]] = mul i32 [[IN0HI]], [[IN1]]
+; CHECK-NEXT: [[ADDC:%.*]] = add i32 [[M10]], [[M01]]
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[ADDC]], 16
+; CHECK-NEXT: call void @use32(i32 [[SHL]])
+; CHECK-NEXT: [[RETLO:%.*]] = mul i32 [[IN0]], [[IN1]]
+; CHECK-NEXT: ret i32 [[RETLO]]
+;
+ %In0Lo = and i32 %in0, 65535
+ %In0Hi = lshr i32 %in0, 16
+ %In1Lo = and i32 %in1, 65535
+ %In1Hi = lshr i32 %in1, 16
+ %m10 = mul i32 %In1Hi, %In0Lo
+ %m01 = mul i32 %In1Lo, %In0Hi
+ %m00 = mul i32 %In1Lo, %In0Lo
+ %addc = add i32 %m10, %m01
+ %shl = shl i32 %addc, 16
+ call void @use32(i32 %shl)
+ %retLo = add i32 %shl, %m00
+ ret i32 %retLo
+}
\ No newline at end of file
More information about the llvm-commits
mailing list