[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Eliminate the redundant sign extension of division (#107971) (PR #109125)

Tobias Hieta via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Sep 30 23:51:35 PDT 2024


https://github.com/tru updated https://github.com/llvm/llvm-project/pull/109125

>From a7554dfc222b13624426ebd6ef46e122b9c16ee7 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 10 Sep 2024 09:19:39 +0800
Subject: [PATCH 1/2]  [LoongArch][ISel] Check the number of sign bits in
 `PatGprGpr_32` (#107432)

After https://github.com/llvm/llvm-project/pull/92205, LoongArch ISel
selects `div.w` for `trunc i64 (sdiv i64 3202030857, (sext i32 X to
i64)) to i32`. It is incorrect since `3202030857` is not a signed 32-bit
constant. It will produce wrong result when `X == 2`:
https://alive2.llvm.org/ce/z/pzfGZZ

This patch adds additional `sexti32` checks to operands of
`PatGprGpr_32`.
Alive2 proof: https://alive2.llvm.org/ce/z/AkH5Mp

Fix #107414.

(cherry picked from commit a111f9119a5ec77c19a514ec09454218f739454f)
---
 .../Target/LoongArch/LoongArchInstrInfo.td    |  5 +-
 .../ir-instruction/sdiv-udiv-srem-urem.ll     | 67 ++++++++++++++++++-
 2 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index ef647a42778737..339d50bd819217 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1065,10 +1065,13 @@ def RDTIME_D : RDTIME_2R<0x00006800>;
 
 /// Generic pattern classes
 
+def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
+  return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLE(MVT::i32);
+}]>;
 class PatGprGpr<SDPatternOperator OpNode, LAInst Inst>
     : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>;
 class PatGprGpr_32<SDPatternOperator OpNode, LAInst Inst>
-    : Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, GPR:$rk)>;
+    : Pat<(sext_inreg (OpNode (assertsexti32 GPR:$rj), (assertsexti32 GPR:$rk)), i32), (Inst GPR:$rj, GPR:$rk)>;
 class PatGpr<SDPatternOperator OpNode, LAInst Inst>
     : Pat<(OpNode GPR:$rj), (Inst GPR:$rj)>;
 
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index ab3eec240db3c1..c22acdb4969071 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -191,7 +191,8 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    div.w $a0, $a0, $a1
+; LA64-NEXT:    div.d $a0, $a0, $a1
+; LA64-NEXT:    addi.w $a0, $a0, 0
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32:
@@ -207,11 +208,12 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    div.w $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.d $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB5_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB5_2: # %entry
+; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = sdiv i32 %a, %b
@@ -1151,3 +1153,64 @@ entry:
   %r = urem i64 %a, %b
   ret i64 %r
 }
+
+define signext i32 @pr107414(i32 signext %x) {
+; LA32-LABEL: pr107414:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    addi.w $sp, $sp, -16
+; LA32-NEXT:    .cfi_def_cfa_offset 16
+; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:    .cfi_offset 1, -4
+; LA32-NEXT:    move $a2, $a0
+; LA32-NEXT:    srai.w $a3, $a0, 31
+; LA32-NEXT:    lu12i.w $a0, -266831
+; LA32-NEXT:    ori $a0, $a0, 3337
+; LA32-NEXT:    move $a1, $zero
+; LA32-NEXT:    bl %plt(__divdi3)
+; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 16
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: pr107414:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a1, -266831
+; LA64-NEXT:    ori $a1, $a1, 3337
+; LA64-NEXT:    lu32i.d $a1, 0
+; LA64-NEXT:    div.d $a0, $a1, $a0
+; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    ret
+;
+; LA32-TRAP-LABEL: pr107414:
+; LA32-TRAP:       # %bb.0: # %entry
+; LA32-TRAP-NEXT:    addi.w $sp, $sp, -16
+; LA32-TRAP-NEXT:    .cfi_def_cfa_offset 16
+; LA32-TRAP-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-TRAP-NEXT:    .cfi_offset 1, -4
+; LA32-TRAP-NEXT:    move $a2, $a0
+; LA32-TRAP-NEXT:    srai.w $a3, $a0, 31
+; LA32-TRAP-NEXT:    lu12i.w $a0, -266831
+; LA32-TRAP-NEXT:    ori $a0, $a0, 3337
+; LA32-TRAP-NEXT:    move $a1, $zero
+; LA32-TRAP-NEXT:    bl %plt(__divdi3)
+; LA32-TRAP-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-TRAP-NEXT:    addi.w $sp, $sp, 16
+; LA32-TRAP-NEXT:    ret
+;
+; LA64-TRAP-LABEL: pr107414:
+; LA64-TRAP:       # %bb.0: # %entry
+; LA64-TRAP-NEXT:    lu12i.w $a1, -266831
+; LA64-TRAP-NEXT:    ori $a1, $a1, 3337
+; LA64-TRAP-NEXT:    lu32i.d $a1, 0
+; LA64-TRAP-NEXT:    div.d $a1, $a1, $a0
+; LA64-TRAP-NEXT:    bnez $a0, .LBB32_2
+; LA64-TRAP-NEXT:  # %bb.1: # %entry
+; LA64-TRAP-NEXT:    break 7
+; LA64-TRAP-NEXT:  .LBB32_2: # %entry
+; LA64-TRAP-NEXT:    addi.w $a0, $a1, 0
+; LA64-TRAP-NEXT:    ret
+entry:
+  %conv = sext i32 %x to i64
+  %div = sdiv i64 3202030857, %conv
+  %conv1 = trunc i64 %div to i32
+  ret i32 %conv1
+}

>From 99058521d4c80635f60b2c1442b683395e0ee818 Mon Sep 17 00:00:00 2001
From: hev <wangrui at loongson.cn>
Date: Tue, 10 Sep 2024 16:52:21 +0800
Subject: [PATCH 2/2] [LoongArch] Eliminate the redundant sign extension of
 division (#107971)

If all incoming values of `div.d` are sign-extended and all users only
use the lower 32 bits, then convert them to W versions.

Fixes: #107946
(cherry picked from commit 0f47e3aebdd2a4a938468a272ea4224552dbf176)
---
 llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp | 15 +++++++++++++++
 .../ir-instruction/sdiv-udiv-srem-urem.ll         |  6 ++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
index abac69054f3b91..ab90409fdf47d0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
@@ -637,6 +637,19 @@ static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST,
         break;
       }
       return false;
+    // If all incoming values are sign-extended and all users only use
+    // the lower 32 bits, then convert them to W versions.
+    case LoongArch::DIV_D: {
+      if (!AddRegToWorkList(MI->getOperand(1).getReg()))
+        return false;
+      if (!AddRegToWorkList(MI->getOperand(2).getReg()))
+        return false;
+      if (hasAllWUsers(*MI, ST, MRI)) {
+        FixableDef.insert(MI);
+        break;
+      }
+      return false;
+    }
     }
   }
 
@@ -651,6 +664,8 @@ static unsigned getWOp(unsigned Opcode) {
     return LoongArch::ADDI_W;
   case LoongArch::ADD_D:
     return LoongArch::ADD_W;
+  case LoongArch::DIV_D:
+    return LoongArch::DIV_W;
   case LoongArch::LD_D:
   case LoongArch::LD_WU:
     return LoongArch::LD_W;
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index c22acdb4969071..c5af79157eaadc 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -191,8 +191,7 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    div.d $a0, $a0, $a1
-; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    div.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32:
@@ -208,12 +207,11 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB5_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB5_2: # %entry
-; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = sdiv i32 %a, %b



More information about the llvm-branch-commits mailing list