[llvm] b0d8823 - [RISCV] Add isel pattern to optimize (mul (and X, 0xffffffff), (and Y, 0xffffffff)) on RV64

Sat Mar 20 14:56:01 PDT 2021

Author: Craig Topper
Date: 2021-03-20T14:55:46-07:00
New Revision: b0d8823a8a440549f303f9ba45aaa5550e1dc536

URL: https://github.com/llvm/llvm-project/commit/b0d8823a8a440549f303f9ba45aaa5550e1dc536
DIFF: https://github.com/llvm/llvm-project/commit/b0d8823a8a440549f303f9ba45aaa5550e1dc536.diff

LOG: [RISCV] Add isel pattern to optimize (mul (and X, 0xffffffff), (and Y, 0xffffffff)) on RV64

This patterns computes the full 64 bit product of a 32x32 unsigned
multiply. This requires a two pairs of SLLI+SRLI to zero the
upper 32 bits of the inputs.

We can do better than this by using two SLLI to move the lower
bits to the upper bits then use MULHU to compute the product. This
is the high half of a full 64x64 product. Since we put 32 0s in the lower
bits of the inputs we know the 128-bit product will have zeros in the
lower 64 bits. So the upper 64 bits, which MULHU computes, will contain
the original 64 bit product we were after.

The same trick would work for (mul (sext_inreg X, i32), (sext_inreg Y, i32))
using MULHS, but sext_inreg is sext.w which is already one instruction so we
wouldn't save anything.

Differential Revision: https://reviews.llvm.org/D99026

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVInstrInfoM.td
    llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll
    llvm/test/CodeGen/RISCV/xaluo.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index 2bfdc9312ebb..d38b5a98b31c 100644

--- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -93,4 +93,13 @@ def : Pat<(and (riscv_remuw (assertzexti32 GPR:$rs1),
 // produce a result where res[63:32]=0 and res[31]=1.
 def : Pat<(srem (sexti32 (i64 GPR:$rs1)), (sexti32 (i64 GPR:$rs2))),
           (REMW GPR:$rs1, GPR:$rs2)>;
+
+// Special case for calculating the full 64-bit product of a 32x32 unsigned
+// multiply where the inputs aren't known to be zero extended. We can shift the
+// inputs left by 32 and use a MULHU. This saves two SRLIs needed to finish
+// zeroing the upper 32 bits.
+// TODO: If one of the operands is zero extended and the other isn't, we might
+// still be better off shifting both left by 32.
+def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))),
+          (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
 } // Predicates = [HasStdExtM, IsRV64]

diff  --git a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll
index c4a4de7681b0..682f351478ed 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll
@@ -10,13 +10,11 @@ define signext i32 @addw(i32 signext %s, i32 signext %n, i32 signext %k) nounwin
 ; CHECK-NEXT:    add a2, a2, a1
 ; CHECK-NEXT:    addi a3, a0, 1
 ; CHECK-NEXT:    mul a3, a2, a3
-; CHECK-NEXT:    slli a2, a2, 32
-; CHECK-NEXT:    srli a2, a2, 32
 ; CHECK-NEXT:    sub a1, a1, a0
 ; CHECK-NEXT:    addi a1, a1, -2
 ; CHECK-NEXT:    slli a1, a1, 32
-; CHECK-NEXT:    srli a1, a1, 32
-; CHECK-NEXT:    mul a1, a2, a1
+; CHECK-NEXT:    slli a2, a2, 32
+; CHECK-NEXT:    mulhu a1, a2, a1
 ; CHECK-NEXT:    srli a1, a1, 1
 ; CHECK-NEXT:    add a0, a3, a0
 ; CHECK-NEXT:    addw a0, a0, a1
@@ -57,13 +55,11 @@ define signext i32 @subw(i32 signext %s, i32 signext %n, i32 signext %k) nounwin
 ; CHECK-NEXT:    not a2, a0
 ; CHECK-NEXT:    add a3, a2, a1
 ; CHECK-NEXT:    mul a2, a3, a2
-; CHECK-NEXT:    slli a3, a3, 32
-; CHECK-NEXT:    srli a3, a3, 32
 ; CHECK-NEXT:    sub a1, a1, a0
 ; CHECK-NEXT:    addi a1, a1, -2
 ; CHECK-NEXT:    slli a1, a1, 32
-; CHECK-NEXT:    srli a1, a1, 32
-; CHECK-NEXT:    mul a1, a3, a1
+; CHECK-NEXT:    slli a3, a3, 32
+; CHECK-NEXT:    mulhu a1, a3, a1
 ; CHECK-NEXT:    srli a1, a1, 1
 ; CHECK-NEXT:    sub a0, a2, a0
 ; CHECK-NEXT:    subw a0, a0, a1

diff  --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll
index e29cfc9156bc..facc0f2914b1 100644
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -556,10 +556,8 @@ define zeroext i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) {
 ; RV64-LABEL: umulo.i32:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    mul a1, a0, a1
+; RV64-NEXT:    mulhu a1, a0, a1
 ; RV64-NEXT:    srli a0, a1, 32
 ; RV64-NEXT:    snez a0, a0
 ; RV64-NEXT:    sw a1, 0(a2)
@@ -1297,10 +1295,8 @@ define i32 @umulo.select.i32(i32 %v1, i32 %v2) {
 ; RV64-LABEL: umulo.select.i32:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    slli a2, a1, 32
-; RV64-NEXT:    srli a2, a2, 32
 ; RV64-NEXT:    slli a3, a0, 32
-; RV64-NEXT:    srli a3, a3, 32
-; RV64-NEXT:    mul a2, a3, a2
+; RV64-NEXT:    mulhu a2, a3, a2
 ; RV64-NEXT:    srli a2, a2, 32
 ; RV64-NEXT:    bnez a2, .LBB42_2
 ; RV64-NEXT:  # %bb.1: # %entry
@@ -1324,10 +1320,8 @@ define i1 @umulo.not.i32(i32 %v1, i32 %v2) {
 ; RV64-LABEL: umulo.not.i32:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    mulhu a0, a0, a1
 ; RV64-NEXT:    srli a0, a0, 32
 ; RV64-NEXT:    seqz a0, a0
 ; RV64-NEXT:    ret
@@ -1893,10 +1887,8 @@ define zeroext i1 @umulo.br.i32(i32 %v1, i32 %v2) {
 ; RV64-LABEL: umulo.br.i32:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    mulhu a0, a0, a1
 ; RV64-NEXT:    srli a0, a0, 32
 ; RV64-NEXT:    beqz a0, .LBB57_2
 ; RV64-NEXT:  # %bb.1: # %overflow