[llvm-branch-commits] [llvm] 22d7bee - [PPCISelLowering] Avoid emitting calls to __multi3, __muloti4

Tom Stellard via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Apr 5 21:45:12 PDT 2022


Author: Aaron Puchert
Date: 2022-04-05T21:43:08-07:00
New Revision: 22d7bee01a5af14e16e3ecc610e8e50e072385bc

URL: https://github.com/llvm/llvm-project/commit/22d7bee01a5af14e16e3ecc610e8e50e072385bc
DIFF: https://github.com/llvm/llvm-project/commit/22d7bee01a5af14e16e3ecc610e8e50e072385bc.diff

LOG: [PPCISelLowering] Avoid emitting calls to __multi3, __muloti4

After D108936, @llvm.smul.with.overflow.i64 was lowered to __multi3
instead of __mulodi4, which also doesn't exist on PowerPC 32-bit, not
even with compiler-rt. Block it as well so that we get inline code.

Because libgcc doesn't have __muloti4, we block that as well.

Fixes #54460.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D122090

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/test/CodeGen/PowerPC/overflow-intrinsic-optimizations.ll
    llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
    llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index cbeae0ab03b83..6c9d43ad8c03f 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1305,11 +1305,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
   }
 
+  setLibcallName(RTLIB::MULO_I128, nullptr);
   if (!isPPC64) {
     // These libcalls are not available in 32-bit.
     setLibcallName(RTLIB::SHL_I128, nullptr);
     setLibcallName(RTLIB::SRL_I128, nullptr);
     setLibcallName(RTLIB::SRA_I128, nullptr);
+    setLibcallName(RTLIB::MUL_I128, nullptr);
     setLibcallName(RTLIB::MULO_I64, nullptr);
   }
 

diff  --git a/llvm/test/CodeGen/PowerPC/overflow-intrinsic-optimizations.ll b/llvm/test/CodeGen/PowerPC/overflow-intrinsic-optimizations.ll
index 83e9d8ee627e4..353dac3d2c951 100644
--- a/llvm/test/CodeGen/PowerPC/overflow-intrinsic-optimizations.ll
+++ b/llvm/test/CodeGen/PowerPC/overflow-intrinsic-optimizations.ll
@@ -1,8 +1,10 @@
 ; RUN: llc %s -mtriple=powerpc -o - | FileCheck %s
+; RUN: llc %s -mtriple=powerpc64 -o - | FileCheck %s
 
 define i1 @no__mulodi4(i32 %a, i64 %b, i32* %c) {
 ; CHECK-LABEL: no__mulodi4
 ; CHECK-NOT: bl __mulodi4
+; CHECK-NOT: bl __multi3
 entry:
   %0 = sext i32 %a to i64
   %1 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %0, i64 %b)
@@ -16,4 +18,14 @@ entry:
   ret i1 %7
 }
 
+define i1 @no__muloti4(i128 %a, i128 %b) {
+; CHECK-LABEL: no__muloti4
+; CHECK-NOT: bl __muloti4
+entry:
+  %0 = call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %a, i128 %b)
+  %1 = extractvalue { i128, i1 } %0, 1
+  ret i1 %1
+}
+
 declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64)
+declare { i128, i1 } @llvm.smul.with.overflow.i128(i128, i128)

diff  --git a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
index 335946cb24de2..778edba2719a6 100644
--- a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
@@ -32,110 +32,103 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ;
 ; PPC32-LABEL: muloti_test:
 ; PPC32:       # %bb.0: # %start
-; PPC32-NEXT:    mflr 0
-; PPC32-NEXT:    stw 0, 4(1)
 ; PPC32-NEXT:    stwu 1, -64(1)
-; PPC32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
-; PPC32-NEXT:    mfcr 12
 ; PPC32-NEXT:    stw 26, 40(1) # 4-byte Folded Spill
-; PPC32-NEXT:    mr 26, 7
-; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
-; PPC32-NEXT:    mr 28, 4
-; PPC32-NEXT:    stw 29, 52(1) # 4-byte Folded Spill
-; PPC32-NEXT:    mr 29, 8
-; PPC32-NEXT:    mr 24, 3
-; PPC32-NEXT:    li 3, 0
-; PPC32-NEXT:    li 4, 0
-; PPC32-NEXT:    li 7, 0
-; PPC32-NEXT:    li 8, 0
+; PPC32-NEXT:    mulhwu. 26, 7, 6
+; PPC32-NEXT:    mcrf 1, 0
+; PPC32-NEXT:    mfcr 12
+; PPC32-NEXT:    cmpwi 7, 5, 0
+; PPC32-NEXT:    cmpwi 2, 7, 0
+; PPC32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
+; PPC32-NEXT:    mulhwu. 26, 5, 8
+; PPC32-NEXT:    mcrf 5, 0
 ; PPC32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
-; PPC32-NEXT:    mr 23, 5
+; PPC32-NEXT:    crnor 28, 30, 10
+; PPC32-NEXT:    stw 29, 52(1) # 4-byte Folded Spill
+; PPC32-NEXT:    cmpwi 2, 9, 0
+; PPC32-NEXT:    mulhwu. 26, 3, 10
+; PPC32-NEXT:    mcrf 6, 0
+; PPC32-NEXT:    cmpwi 3, 3, 0
+; PPC32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
+; PPC32-NEXT:    crnor 29, 10, 14
 ; PPC32-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
-; PPC32-NEXT:    mr 25, 9
+; PPC32-NEXT:    mulhwu. 26, 9, 4
 ; PPC32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
-; PPC32-NEXT:    mr 27, 6
+; PPC32-NEXT:    crorc 28, 28, 6
+; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT:    crorc 20, 28, 22
 ; PPC32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
-; PPC32-NEXT:    mr 30, 10
-; PPC32-NEXT:    stw 12, 24(1)
-; PPC32-NEXT:    bl __multi3
-; PPC32-NEXT:    mulhwu. 9, 26, 27
-; PPC32-NEXT:    mfcr 9 # cr0
-; PPC32-NEXT:    cmpwi 2, 26, 0
-; PPC32-NEXT:    stw 9, 20(1)
-; PPC32-NEXT:    cmpwi 3, 23, 0
-; PPC32-NEXT:    crnor 12, 14, 10
-; PPC32-NEXT:    cmpwi 4, 24, 0
-; PPC32-NEXT:    mulhwu. 9, 23, 29
-; PPC32-NEXT:    mcrf 5, 0
-; PPC32-NEXT:    cmpwi 1, 25, 0
-; PPC32-NEXT:    crnor 4, 6, 18
-; PPC32-NEXT:    mulhwu. 9, 24, 30
-; PPC32-NEXT:    mcrf 6, 0
-; PPC32-NEXT:    mulhwu. 0, 25, 28
-; PPC32-NEXT:    mcrf 7, 0
-; PPC32-NEXT:    or. 0, 28, 24
-; PPC32-NEXT:    mcrf 2, 0
-; PPC32-NEXT:    or. 0, 29, 26
-; PPC32-NEXT:    crnor 5, 2, 10
-; PPC32-NEXT:    mullw 10, 26, 27
-; PPC32-NEXT:    lwz 26, 20(1)
-; PPC32-NEXT:    mullw 9, 23, 29
-; PPC32-NEXT:    add 9, 10, 9
-; PPC32-NEXT:    mtcrf 128, 26 # cr0
-; PPC32-NEXT:    crorc 6, 12, 2
-; PPC32-NEXT:    crorc 20, 6, 22
-; PPC32-NEXT:    mulhwu 7, 29, 27
-; PPC32-NEXT:    add 9, 7, 9
-; PPC32-NEXT:    cmplw 9, 7
-; PPC32-NEXT:    crorc 21, 4, 26
-; PPC32-NEXT:    cror 20, 20, 0
-; PPC32-NEXT:    crorc 21, 21, 30
-; PPC32-NEXT:    mullw 11, 25, 28
-; PPC32-NEXT:    mullw 12, 24, 30
-; PPC32-NEXT:    add 10, 12, 11
-; PPC32-NEXT:    lwz 12, 24(1)
-; PPC32-NEXT:    lwz 26, 40(1) # 4-byte Folded Reload
-; PPC32-NEXT:    mulhwu 8, 28, 30
-; PPC32-NEXT:    add 10, 8, 10
-; PPC32-NEXT:    cmplw 10, 8
-; PPC32-NEXT:    cror 21, 21, 0
-; PPC32-NEXT:    cror 21, 5, 21
+; PPC32-NEXT:    mulhwu 0, 6, 10
+; PPC32-NEXT:    stw 12, 20(1)
+; PPC32-NEXT:    crorc 21, 29, 26
+; PPC32-NEXT:    crorc 21, 21, 2
+; PPC32-NEXT:    li 11, 0
+; PPC32-NEXT:    mullw 26, 5, 10
+; PPC32-NEXT:    addc 0, 26, 0
+; PPC32-NEXT:    mulhwu 29, 5, 10
+; PPC32-NEXT:    addze 29, 29
+; PPC32-NEXT:    mullw 23, 5, 8
+; PPC32-NEXT:    mullw 22, 7, 6
+; PPC32-NEXT:    mulhwu 30, 6, 9
+; PPC32-NEXT:    mulhwu 12, 5, 9
+; PPC32-NEXT:    mulhwu 28, 8, 6
+; PPC32-NEXT:    mullw 25, 6, 9
+; PPC32-NEXT:    mullw 24, 5, 9
+; PPC32-NEXT:    mullw 5, 9, 4
+; PPC32-NEXT:    add 9, 22, 23
+; PPC32-NEXT:    add 9, 28, 9
+; PPC32-NEXT:    cmplw 1, 9, 28
+; PPC32-NEXT:    cror 20, 20, 4
+; PPC32-NEXT:    mullw 23, 3, 10
+; PPC32-NEXT:    add 26, 23, 5
+; PPC32-NEXT:    addc 5, 25, 0
+; PPC32-NEXT:    addze 30, 30
+; PPC32-NEXT:    or. 3, 4, 3
+; PPC32-NEXT:    mulhwu 27, 4, 10
+; PPC32-NEXT:    mcrf 1, 0
+; PPC32-NEXT:    addc 3, 29, 30
+; PPC32-NEXT:    add 26, 27, 26
+; PPC32-NEXT:    cmplw 6, 26, 27
+; PPC32-NEXT:    cror 21, 21, 24
+; PPC32-NEXT:    mullw 0, 4, 10
+; PPC32-NEXT:    or. 4, 8, 7
+; PPC32-NEXT:    addze 4, 11
+; PPC32-NEXT:    addc 7, 24, 3
+; PPC32-NEXT:    crnor 22, 2, 6
+; PPC32-NEXT:    mullw 28, 8, 6
+; PPC32-NEXT:    adde 8, 12, 4
+; PPC32-NEXT:    addc 3, 0, 28
+; PPC32-NEXT:    adde 9, 26, 9
+; PPC32-NEXT:    addc 4, 7, 3
+; PPC32-NEXT:    adde 3, 8, 9
+; PPC32-NEXT:    cror 21, 22, 21
+; PPC32-NEXT:    cmplw   4, 7
+; PPC32-NEXT:    cmplw 1, 3, 8
+; PPC32-NEXT:    lwz 12, 20(1)
 ; PPC32-NEXT:    cror 20, 21, 20
-; PPC32-NEXT:    mullw 0, 29, 27
+; PPC32-NEXT:    crandc 21, 4, 6
+; PPC32-NEXT:    crand 22, 6, 0
+; PPC32-NEXT:    cror 21, 22, 21
+; PPC32-NEXT:    crnor 20, 20, 21
+; PPC32-NEXT:    li 7, 1
+; PPC32-NEXT:    mullw 6, 6, 10
+; PPC32-NEXT:    bc 12, 20, .LBB0_1
+; PPC32-NEXT:    b .LBB0_2
+; PPC32-NEXT:  .LBB0_1: # %start
+; PPC32-NEXT:    li      7, 0
+; PPC32-NEXT:  .LBB0_2: # %start
 ; PPC32-NEXT:    mtcrf 32, 12 # cr2
 ; PPC32-NEXT:    mtcrf 16, 12 # cr3
-; PPC32-NEXT:    mtcrf 8, 12 # cr4
-; PPC32-NEXT:    lwz 29, 52(1) # 4-byte Folded Reload
-; PPC32-NEXT:    mullw 7, 28, 30
-; PPC32-NEXT:    addc 7, 7, 0
-; PPC32-NEXT:    adde 11, 10, 9
-; PPC32-NEXT:    addc 9, 4, 7
-; PPC32-NEXT:    adde 8, 3, 11
-; PPC32-NEXT:    cmplw 6, 9, 4
-; PPC32-NEXT:    cmplw 8, 3
-; PPC32-NEXT:    crand 22, 2, 24
-; PPC32-NEXT:    crandc 23, 0, 2
-; PPC32-NEXT:    cror 22, 22, 23
-; PPC32-NEXT:    crnor 20, 20, 22
-; PPC32-NEXT:    li 3, 1
-; PPC32-NEXT:    bc 12, 20, .LBB0_2
-; PPC32-NEXT:  # %bb.1: # %start
-; PPC32-NEXT:    ori 7, 3, 0
-; PPC32-NEXT:    b .LBB0_3
-; PPC32-NEXT:  .LBB0_2: # %start
-; PPC32-NEXT:    li 7, 0
-; PPC32-NEXT:  .LBB0_3: # %start
-; PPC32-NEXT:    mr 3, 8
-; PPC32-NEXT:    mr 4, 9
 ; PPC32-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz 29, 52(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 28, 48(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 27, 44(1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz 26, 40(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 25, 36(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 24, 32(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 23, 28(1) # 4-byte Folded Reload
-; PPC32-NEXT:    lwz 0, 68(1)
+; PPC32-NEXT:    lwz 22, 24(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    addi 1, 1, 64
-; PPC32-NEXT:    mtlr 0
 ; PPC32-NEXT:    blr
 start:
   %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2

diff  --git a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
index ef73fa686b3c6..65ea57eaf3b9f 100644
--- a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll
@@ -208,43 +208,46 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
 define i1 @test_urem_oversized(i66 %X) nounwind {
 ; PPC-LABEL: test_urem_oversized:
 ; PPC:       # %bb.0:
-; PPC-NEXT:    mflr 0
-; PPC-NEXT:    stw 0, 4(1)
-; PPC-NEXT:    stwu 1, -16(1)
-; PPC-NEXT:    mr 6, 5
-; PPC-NEXT:    mr 5, 4
-; PPC-NEXT:    mr 4, 3
-; PPC-NEXT:    lis 3, 12057
-; PPC-NEXT:    lis 7, -12795
-; PPC-NEXT:    ori 9, 3, 37186
-; PPC-NEXT:    ori 10, 7, 40665
-; PPC-NEXT:    li 3, 0
-; PPC-NEXT:    li 7, 0
-; PPC-NEXT:    li 8, 2
-; PPC-NEXT:    bl __multi3
-; PPC-NEXT:    rotlwi 7, 6, 31
-; PPC-NEXT:    lis 3, -5526
-; PPC-NEXT:    rlwimi 7, 5, 31, 0, 0
-; PPC-NEXT:    rotlwi 5, 5, 31
-; PPC-NEXT:    rlwimi 5, 4, 31, 0, 0
-; PPC-NEXT:    ori 3, 3, 61135
-; PPC-NEXT:    cmplwi 1, 5, 13
-; PPC-NEXT:    cmplw 7, 3
-; PPC-NEXT:    rlwinm 4, 4, 31, 31, 31
+; PPC-NEXT:    lis 6, -12795
+; PPC-NEXT:    ori 6, 6, 40665
+; PPC-NEXT:    mulhwu 7, 5, 6
+; PPC-NEXT:    lis 9, 12057
+; PPC-NEXT:    ori 9, 9, 37186
+; PPC-NEXT:    mullw 11, 4, 6
+; PPC-NEXT:    addc 7, 11, 7
+; PPC-NEXT:    lis 11, -5526
+; PPC-NEXT:    ori 11, 11, 61135
+; PPC-NEXT:    mulhwu 8, 4, 6
+; PPC-NEXT:    addze 8, 8
+; PPC-NEXT:    mulhwu 10, 5, 9
+; PPC-NEXT:    mullw 4, 4, 9
+; PPC-NEXT:    mullw 9, 5, 9
+; PPC-NEXT:    addc 7, 9, 7
+; PPC-NEXT:    addze 9, 10
+; PPC-NEXT:    rotlwi 10, 7, 31
+; PPC-NEXT:    mullw 3, 3, 6
+; PPC-NEXT:    mullw 6, 5, 6
+; PPC-NEXT:    slwi 5, 5, 1
+; PPC-NEXT:    add 3, 5, 3
+; PPC-NEXT:    rotlwi 5, 6, 31
+; PPC-NEXT:    rlwimi 5, 7, 31, 0, 0
+; PPC-NEXT:    add 7, 8, 9
+; PPC-NEXT:    add 4, 4, 7
+; PPC-NEXT:    add 3, 4, 3
+; PPC-NEXT:    rlwimi 10, 3, 31, 0, 0
+; PPC-NEXT:    cmplw 5, 11
+; PPC-NEXT:    cmplwi 1, 10, 13
+; PPC-NEXT:    rlwinm 3, 3, 31, 31, 31
 ; PPC-NEXT:    crand 20, 6, 0
 ; PPC-NEXT:    crandc 21, 4, 6
-; PPC-NEXT:    rlwimi. 4, 6, 1, 30, 30
+; PPC-NEXT:    rlwimi. 3, 6, 1, 30, 30
 ; PPC-NEXT:    cror 20, 20, 21
 ; PPC-NEXT:    crnand 20, 2, 20
 ; PPC-NEXT:    li 3, 1
 ; PPC-NEXT:    bc 12, 20, .LBB5_1
-; PPC-NEXT:    b .LBB5_2
+; PPC-NEXT:    blr
 ; PPC-NEXT:  .LBB5_1:
 ; PPC-NEXT:    li 3, 0
-; PPC-NEXT:  .LBB5_2:
-; PPC-NEXT:    lwz 0, 20(1)
-; PPC-NEXT:    addi 1, 1, 16
-; PPC-NEXT:    mtlr 0
 ; PPC-NEXT:    blr
 ;
 ; PPC64LE-LABEL: test_urem_oversized:


        


More information about the llvm-branch-commits mailing list