[llvm] 9a99a1a - [InstCombine] Add one-use limitation to box multiply fold (#72876)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 4 05:15:03 PST 2023
Author: shaojingzhi
Date: 2023-12-04T14:14:59+01:00
New Revision: 9a99a1a39e1d067abb9a6cc0d53e7708d6c49995
URL: https://github.com/llvm/llvm-project/commit/9a99a1a39e1d067abb9a6cc0d53e7708d6c49995
DIFF: https://github.com/llvm/llvm-project/commit/9a99a1a39e1d067abb9a6cc0d53e7708d6c49995.diff
LOG: [InstCombine] Add one-use limitation to box multiply fold (#72876)
Check the operands of I are used in no more than one place, which can
not be deleted, cause a mul instruction has far more weight than add and
shl instruction in IR, thus this method cannot achieve the goal of
simplifying instructions, just return null.
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
llvm/test/Transforms/InstCombine/mul_fold.ll
llvm/test/Transforms/InstCombine/mul_full_64.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 3604abb8e5277..427558f309056 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1415,8 +1415,10 @@ static Instruction *foldBoxMultiply(BinaryOperator &I) {
// ResLo = (CrossSum << HalfBits) + (YLo * XLo)
Value *XLo, *YLo;
Value *CrossSum;
+ // Require one-use on the multiply to avoid increasing the number of
+ // multiplications.
if (!match(&I, m_c_Add(m_Shl(m_Value(CrossSum), m_SpecificInt(HalfBits)),
- m_Mul(m_Value(YLo), m_Value(XLo)))))
+ m_OneUse(m_Mul(m_Value(YLo), m_Value(XLo))))))
return nullptr;
// XLo = X & HalfMask
diff --git a/llvm/test/Transforms/InstCombine/mul_fold.ll b/llvm/test/Transforms/InstCombine/mul_fold.ll
index d20ac6070d108..a1fdec3c68cc4 100644
--- a/llvm/test/Transforms/InstCombine/mul_fold.ll
+++ b/llvm/test/Transforms/InstCombine/mul_fold.ll
@@ -712,3 +712,30 @@ define i8 @mul8_low_miss_half_width(i8 %in0, i8 %in1) {
%retLo = add i8 %shl, %m00
ret i8 %retLo
}
+
+; Test case to show shl doesn't need hasOneUse constraint
+define i32 @mul32_low_extra_shl_use(i32 %in0, i32 %in1) {
+; CHECK-LABEL: @mul32_low_extra_shl_use(
+; CHECK-NEXT: [[IN0HI:%.*]] = lshr i32 [[IN0:%.*]], 16
+; CHECK-NEXT: [[IN1HI:%.*]] = lshr i32 [[IN1:%.*]], 16
+; CHECK-NEXT: [[M10:%.*]] = mul i32 [[IN1HI]], [[IN0]]
+; CHECK-NEXT: [[M01:%.*]] = mul i32 [[IN0HI]], [[IN1]]
+; CHECK-NEXT: [[ADDC:%.*]] = add i32 [[M10]], [[M01]]
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[ADDC]], 16
+; CHECK-NEXT: call void @use32(i32 [[SHL]])
+; CHECK-NEXT: [[RETLO:%.*]] = mul i32 [[IN0]], [[IN1]]
+; CHECK-NEXT: ret i32 [[RETLO]]
+;
+ %In0Lo = and i32 %in0, 65535
+ %In0Hi = lshr i32 %in0, 16
+ %In1Lo = and i32 %in1, 65535
+ %In1Hi = lshr i32 %in1, 16
+ %m10 = mul i32 %In1Hi, %In0Lo
+ %m01 = mul i32 %In1Lo, %In0Hi
+ %m00 = mul i32 %In1Lo, %In0Lo
+ %addc = add i32 %m10, %m01
+ %shl = shl i32 %addc, 16
+ call void @use32(i32 %shl)
+ %retLo = add i32 %shl, %m00
+ ret i32 %retLo
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/InstCombine/mul_full_64.ll b/llvm/test/Transforms/InstCombine/mul_full_64.ll
index eb652f3f8a1d0..7cddb63b9ba63 100644
--- a/llvm/test/Transforms/InstCombine/mul_full_64.ll
+++ b/llvm/test/Transforms/InstCombine/mul_full_64.ll
@@ -177,6 +177,7 @@ define i64 @mul_full_64_variant2(i64 %a, i64 %b, ptr nocapture %rhi) {
ret i64 %add27
}
+; Negative test case for mul_fold function: MUL7 is used in more than one place
define i64 @mul_full_64_variant3(i64 %a, i64 %b, ptr nocapture %rhi) {
; CHECK-LABEL: @mul_full_64_variant3(
; CHECK-NEXT: [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295
@@ -196,7 +197,9 @@ define i64 @mul_full_64_variant3(i64 %a, i64 %b, ptr nocapture %rhi) {
; CHECK-NEXT: [[SHR_I:%.*]] = lshr i64 [[ADD15]], 32
; CHECK-NEXT: [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I]]
; CHECK-NEXT: store i64 [[ADD17]], ptr [[RHI:%.*]], align 8
-; CHECK-NEXT: [[ADD19:%.*]] = mul i64 [[A]], [[B]]
+; CHECK-NEXT: [[ADD18:%.*]] = add i64 [[MUL6]], [[MUL5]]
+; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[ADD18]], 32
+; CHECK-NEXT: [[ADD19:%.*]] = add i64 [[SHL]], [[MUL7]]
; CHECK-NEXT: ret i64 [[ADD19]]
;
%conv = and i64 %a, 4294967295
More information about the llvm-commits
mailing list