[llvm] 9a99a1a - [InstCombine] Add one-use limitation to box multiply fold (#72876)

Mon Dec 4 05:15:03 PST 2023

Author: shaojingzhi
Date: 2023-12-04T14:14:59+01:00
New Revision: 9a99a1a39e1d067abb9a6cc0d53e7708d6c49995

URL: https://github.com/llvm/llvm-project/commit/9a99a1a39e1d067abb9a6cc0d53e7708d6c49995
DIFF: https://github.com/llvm/llvm-project/commit/9a99a1a39e1d067abb9a6cc0d53e7708d6c49995.diff

LOG: [InstCombine] Add one-use limitation to box multiply fold (#72876)

Check the operands of I are used in no more than one place, which can
not be deleted, cause a mul instruction has far more weight than add and
shl instruction in IR, thus this method cannot achieve the goal of
simplifying instructions, just return null.

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
    llvm/test/Transforms/InstCombine/mul_fold.ll
    llvm/test/Transforms/InstCombine/mul_full_64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 3604abb8e5277..427558f309056 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1415,8 +1415,10 @@ static Instruction *foldBoxMultiply(BinaryOperator &I) {
   // ResLo = (CrossSum << HalfBits) + (YLo * XLo)
   Value *XLo, *YLo;
   Value *CrossSum;
+  // Require one-use on the multiply to avoid increasing the number of
+  // multiplications.
   if (!match(&I, m_c_Add(m_Shl(m_Value(CrossSum), m_SpecificInt(HalfBits)),
-                         m_Mul(m_Value(YLo), m_Value(XLo)))))
+                         m_OneUse(m_Mul(m_Value(YLo), m_Value(XLo))))))
     return nullptr;
 
   // XLo = X & HalfMask

diff  --git a/llvm/test/Transforms/InstCombine/mul_fold.ll b/llvm/test/Transforms/InstCombine/mul_fold.ll
index d20ac6070d108..a1fdec3c68cc4 100644
--- a/llvm/test/Transforms/InstCombine/mul_fold.ll
+++ b/llvm/test/Transforms/InstCombine/mul_fold.ll
@@ -712,3 +712,30 @@ define i8 @mul8_low_miss_half_width(i8 %in0, i8 %in1) {
   %retLo = add i8 %shl, %m00
   ret i8 %retLo
 }
+
+; Test case to show shl doesn't need hasOneUse constraint
+define i32 @mul32_low_extra_shl_use(i32 %in0, i32 %in1) {
+; CHECK-LABEL: @mul32_low_extra_shl_use(
+; CHECK-NEXT:    [[IN0HI:%.*]] = lshr i32 [[IN0:%.*]], 16
+; CHECK-NEXT:    [[IN1HI:%.*]] = lshr i32 [[IN1:%.*]], 16
+; CHECK-NEXT:    [[M10:%.*]] = mul i32 [[IN1HI]], [[IN0]]
+; CHECK-NEXT:    [[M01:%.*]] = mul i32 [[IN0HI]], [[IN1]]
+; CHECK-NEXT:    [[ADDC:%.*]] = add i32 [[M10]], [[M01]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[ADDC]], 16
+; CHECK-NEXT:    call void @use32(i32 [[SHL]])
+; CHECK-NEXT:    [[RETLO:%.*]] = mul i32 [[IN0]], [[IN1]]
+; CHECK-NEXT:    ret i32 [[RETLO]]
+;
+  %In0Lo = and i32 %in0, 65535
+  %In0Hi = lshr i32 %in0, 16
+  %In1Lo = and i32 %in1, 65535
+  %In1Hi = lshr i32 %in1, 16
+  %m10 = mul i32 %In1Hi, %In0Lo
+  %m01 = mul i32 %In1Lo, %In0Hi
+  %m00 = mul i32 %In1Lo, %In0Lo
+  %addc = add i32 %m10, %m01
+  %shl = shl i32 %addc, 16
+  call void @use32(i32 %shl)
+  %retLo = add i32 %shl, %m00
+  ret i32 %retLo
+}
\ No newline at end of file

diff  --git a/llvm/test/Transforms/InstCombine/mul_full_64.ll b/llvm/test/Transforms/InstCombine/mul_full_64.ll
index eb652f3f8a1d0..7cddb63b9ba63 100644
--- a/llvm/test/Transforms/InstCombine/mul_full_64.ll
+++ b/llvm/test/Transforms/InstCombine/mul_full_64.ll
@@ -177,6 +177,7 @@ define i64 @mul_full_64_variant2(i64 %a, i64 %b, ptr nocapture %rhi) {
   ret i64 %add27
 }
 
+; Negative test case for mul_fold function: MUL7 is used in more than one place
 define i64 @mul_full_64_variant3(i64 %a, i64 %b, ptr nocapture %rhi) {
 ; CHECK-LABEL: @mul_full_64_variant3(
 ; CHECK-NEXT:    [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295
@@ -196,7 +197,9 @@ define i64 @mul_full_64_variant3(i64 %a, i64 %b, ptr nocapture %rhi) {
 ; CHECK-NEXT:    [[SHR_I:%.*]] = lshr i64 [[ADD15]], 32
 ; CHECK-NEXT:    [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I]]
 ; CHECK-NEXT:    store i64 [[ADD17]], ptr [[RHI:%.*]], align 8
-; CHECK-NEXT:    [[ADD19:%.*]] = mul i64 [[A]], [[B]]
+; CHECK-NEXT:    [[ADD18:%.*]] = add i64 [[MUL6]], [[MUL5]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i64 [[ADD18]], 32
+; CHECK-NEXT:    [[ADD19:%.*]] = add i64 [[SHL]], [[MUL7]]
 ; CHECK-NEXT:    ret i64 [[ADD19]]
 ;
   %conv = and i64 %a, 4294967295