[llvm] Use div.w/mod.w to eliminate unnecessary sign-extend for sdiv/srem i32. (PR #117298)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 21 23:07:25 PST 2024
https://github.com/tangaac created https://github.com/llvm/llvm-project/pull/117298
None
>From 39b432cc711d258b30da615a5eafcffd2796e88a Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Fri, 22 Nov 2024 12:50:20 +0800
Subject: [PATCH] Use div.w/mod.w to eliminate unnecessary sign-extend
instructions for sdiv/srem i32 .
---
.../LoongArch/LoongArchISelLowering.cpp | 11 ++++++++-
.../Target/LoongArch/LoongArchISelLowering.h | 2 ++
.../Target/LoongArch/LoongArchInstrInfo.td | 4 ++++
.../ir-instruction/sdiv-udiv-srem-urem.ll | 24 +++++++++----------
4 files changed, 28 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 5c567ed4a6f724..a202d9e9b5b50c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -141,7 +141,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
setOperationAction(ISD::BSWAP, MVT::i32, Custom);
- setOperationAction({ISD::UDIV, ISD::UREM}, MVT::i32, Custom);
+ setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
+ Custom);
setOperationAction(ISD::LROUND, MVT::i32, Custom);
}
@@ -2629,8 +2630,12 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
switch (Opcode) {
default:
llvm_unreachable("Unexpected opcode");
+ case ISD::SDIV:
+ return LoongArchISD::DIV_W;
case ISD::UDIV:
return LoongArchISD::DIV_WU;
+ case ISD::SREM:
+ return LoongArchISD::MOD_W;
case ISD::UREM:
return LoongArchISD::MOD_WU;
case ISD::SHL:
@@ -2827,7 +2832,9 @@ void LoongArchTargetLowering::ReplaceNodeResults(
"Unexpected custom legalisation");
Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
break;
+ case ISD::SDIV:
case ISD::UDIV:
+ case ISD::SREM:
case ISD::UREM:
assert(VT == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
@@ -4667,7 +4674,9 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BITREV_W)
NODE_NAME_CASE(ROTR_W)
NODE_NAME_CASE(ROTL_W)
+ NODE_NAME_CASE(DIV_W)
NODE_NAME_CASE(DIV_WU)
+ NODE_NAME_CASE(MOD_W)
NODE_NAME_CASE(MOD_WU)
NODE_NAME_CASE(CLZ_W)
NODE_NAME_CASE(CTZ_W)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 1aa686695b49b8..5a47dfb257175f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -44,6 +44,8 @@ enum NodeType : unsigned {
ROTR_W,
// unsigned 32-bit integer division
+ DIV_W,
+ MOD_W,
DIV_WU,
MOD_WU,
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index cd1500229f4aa9..b9d4f00717a565 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -85,7 +85,9 @@ def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>;
def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>;
def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>;
def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>;
+def loongarch_div_w : SDNode<"LoongArchISD::DIV_W", SDT_LoongArchIntBinOpW>;
def loongarch_div_wu : SDNode<"LoongArchISD::DIV_WU", SDT_LoongArchIntBinOpW>;
+def loongarch_mod_w : SDNode<"LoongArchISD::MOD_W", SDT_LoongArchIntBinOpW>;
def loongarch_mod_wu : SDNode<"LoongArchISD::MOD_WU", SDT_LoongArchIntBinOpW>;
def loongarch_crc_w_b_w
: SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
@@ -1156,10 +1158,12 @@ def : PatGprGpr<sub, SUB_D>;
def : PatGprGpr<sdiv, DIV_D>;
def : PatGprGpr_32<sdiv, DIV_W>;
def : PatGprGpr<udiv, DIV_DU>;
+def : PatGprGpr<loongarch_div_w, DIV_W>;
def : PatGprGpr<loongarch_div_wu, DIV_WU>;
def : PatGprGpr<srem, MOD_D>;
def : PatGprGpr_32<srem, MOD_W>;
def : PatGprGpr<urem, MOD_DU>;
+def : PatGprGpr<loongarch_mod_w, MOD_W>;
def : PatGprGpr<loongarch_mod_wu, MOD_WU>;
def : PatGprGpr<shiftop<rotr>, ROTR_D>;
def : PatGprGpr<shiftopw<loongarch_rotr_w>, ROTR_W>;
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index c5af79157eaadc..99824f6d7718e7 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -121,7 +121,7 @@ define i32 @sdiv_i32(i32 %a, i32 %b) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: addi.w $a0, $a0, 0
-; LA64-NEXT: div.d $a0, $a0, $a1
+; LA64-NEXT: div.w $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: sdiv_i32:
@@ -137,7 +137,7 @@ define i32 @sdiv_i32(i32 %a, i32 %b) {
; LA64-TRAP: # %bb.0: # %entry
; LA64-TRAP-NEXT: addi.w $a1, $a1, 0
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT: div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT: div.w $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB3_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
@@ -156,7 +156,7 @@ define i32 @sdiv_ui32_si32_si32(i32 signext %a, i32 signext %b) {
;
; LA64-LABEL: sdiv_ui32_si32_si32:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: div.d $a0, $a0, $a1
+; LA64-NEXT: div.w $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: sdiv_ui32_si32_si32:
@@ -170,7 +170,7 @@ define i32 @sdiv_ui32_si32_si32(i32 signext %a, i32 signext %b) {
;
; LA64-TRAP-LABEL: sdiv_ui32_si32_si32:
; LA64-TRAP: # %bb.0: # %entry
-; LA64-TRAP-NEXT: div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT: div.w $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB4_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
@@ -693,7 +693,7 @@ define i32 @srem_i32(i32 %a, i32 %b) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: addi.w $a0, $a0, 0
-; LA64-NEXT: mod.d $a0, $a0, $a1
+; LA64-NEXT: mod.w $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: srem_i32:
@@ -709,7 +709,7 @@ define i32 @srem_i32(i32 %a, i32 %b) {
; LA64-TRAP: # %bb.0: # %entry
; LA64-TRAP-NEXT: addi.w $a1, $a1, 0
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1
+; LA64-TRAP-NEXT: mod.w $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB19_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
@@ -728,7 +728,7 @@ define i32 @srem_ui32_si32_si32(i32 signext %a, i32 signext %b) {
;
; LA64-LABEL: srem_ui32_si32_si32:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: mod.d $a0, $a0, $a1
+; LA64-NEXT: mod.w $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: srem_ui32_si32_si32:
@@ -742,7 +742,7 @@ define i32 @srem_ui32_si32_si32(i32 signext %a, i32 signext %b) {
;
; LA64-TRAP-LABEL: srem_ui32_si32_si32:
; LA64-TRAP: # %bb.0: # %entry
-; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1
+; LA64-TRAP-NEXT: mod.w $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB20_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
@@ -763,7 +763,7 @@ define signext i32 @srem_si32_ui32_ui32(i32 %a, i32 %b) {
; LA64: # %bb.0: # %entry
; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: addi.w $a0, $a0, 0
-; LA64-NEXT: mod.d $a0, $a0, $a1
+; LA64-NEXT: mod.w $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: srem_si32_ui32_ui32:
@@ -779,7 +779,7 @@ define signext i32 @srem_si32_ui32_ui32(i32 %a, i32 %b) {
; LA64-TRAP: # %bb.0: # %entry
; LA64-TRAP-NEXT: addi.w $a1, $a1, 0
; LA64-TRAP-NEXT: addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1
+; LA64-TRAP-NEXT: mod.w $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB21_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
@@ -798,7 +798,7 @@ define signext i32 @srem_si32_si32_si32(i32 signext %a, i32 signext %b) {
;
; LA64-LABEL: srem_si32_si32_si32:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: mod.d $a0, $a0, $a1
+; LA64-NEXT: mod.w $a0, $a0, $a1
; LA64-NEXT: ret
;
; LA32-TRAP-LABEL: srem_si32_si32_si32:
@@ -812,7 +812,7 @@ define signext i32 @srem_si32_si32_si32(i32 signext %a, i32 signext %b) {
;
; LA64-TRAP-LABEL: srem_si32_si32_si32:
; LA64-TRAP: # %bb.0: # %entry
-; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1
+; LA64-TRAP-NEXT: mod.w $a0, $a0, $a1
; LA64-TRAP-NEXT: bnez $a1, .LBB22_2
; LA64-TRAP-NEXT: # %bb.1: # %entry
; LA64-TRAP-NEXT: break 7
More information about the llvm-commits
mailing list