[PATCH] D113805: [RISCV] Improve codegen for i32 udiv/urem by constant on RV64.
Craig Topper via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 12 14:45:23 PST 2021
craig.topper updated this revision to Diff 386959.
craig.topper added a comment.
Remove TODO
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D113805/new/
https://reviews.llvm.org/D113805
Files:
llvm/lib/Target/RISCV/RISCVInstrInfoM.td
llvm/test/CodeGen/RISCV/div.ll
llvm/test/CodeGen/RISCV/urem-lkk.ll
Index: llvm/test/CodeGen/RISCV/urem-lkk.ll
===================================================================
--- llvm/test/CodeGen/RISCV/urem-lkk.ll
+++ llvm/test/CodeGen/RISCV/urem-lkk.ll
@@ -103,11 +103,10 @@
; RV64IM-LABEL: fold_urem_positive_even:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 32
-; RV64IM-NEXT: srli a1, a1, 32
-; RV64IM-NEXT: lui a2, 253241
-; RV64IM-NEXT: slli a2, a2, 2
-; RV64IM-NEXT: addi a2, a2, -61
-; RV64IM-NEXT: mul a1, a1, a2
+; RV64IM-NEXT: lui a2, 1012964
+; RV64IM-NEXT: addiw a2, a2, -61
+; RV64IM-NEXT: slli a2, a2, 32
+; RV64IM-NEXT: mulhu a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 42
; RV64IM-NEXT: addi a2, zero, 1060
; RV64IM-NEXT: mulw a1, a1, a2
Index: llvm/test/CodeGen/RISCV/div.ll
===================================================================
--- llvm/test/CodeGen/RISCV/div.ll
+++ llvm/test/CodeGen/RISCV/div.ll
@@ -78,12 +78,10 @@
; RV64IM-LABEL: udiv_constant:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a0, a0, 32
-; RV64IM-NEXT: srli a0, a0, 32
-; RV64IM-NEXT: lui a1, 205
+; RV64IM-NEXT: lui a1, 838861
; RV64IM-NEXT: addiw a1, a1, -819
-; RV64IM-NEXT: slli a1, a1, 12
-; RV64IM-NEXT: addi a1, a1, -819
-; RV64IM-NEXT: mul a0, a0, a1
+; RV64IM-NEXT: slli a1, a1, 32
+; RV64IM-NEXT: mulhu a0, a0, a1
; RV64IM-NEXT: srli a0, a0, 34
; RV64IM-NEXT: ret
%1 = udiv i32 %a, 5
Index: llvm/lib/Target/RISCV/RISCVInstrInfoM.td
===================================================================
--- llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -96,13 +96,24 @@
(REMW GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtM, IsRV64]
+// Pattern to detect constants with no more than 32 active bits that can't
+// be materialized with lui+addiw.
+def uimm32_not_simm32 : PatLeaf<(XLenVT GPR:$a), [{
+ auto *C = dyn_cast<ConstantSDNode>(N);
+ return C && C->hasOneUse() && isUInt<32>(C->getZExtValue()) &&
+ !isInt<32>(C->getSExtValue());
+}]>;
+
let Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] in {
// Special case for calculating the full 64-bit product of a 32x32 unsigned
// multiply where the inputs aren't known to be zero extended. We can shift the
// inputs left by 32 and use a MULHU. This saves two SRLIs needed to finish
// zeroing the upper 32 bits.
-// TODO: If one of the operands is zero extended and the other isn't, we might
-// still be better off shifting both left by 32.
def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))),
(MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
+// The RHS could also be a constant that is hard to materialize. By shifting
+// left we can allow constant materialization to use LUI+ADDIW via
+// hasAllWUsers.
+def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), uimm32_not_simm32:$rs2)),
+ (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
} // Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba]
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D113805.386959.patch
Type: text/x-patch
Size: 3011 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211112/83fed0f1/attachment.bin>
More information about the llvm-commits
mailing list