[llvm] 74f0ec5 - [DAGCombiner] Make it so that `udiv` can be folded with `(select c, NonZero, 1)`

Noah Goldstein via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 12 15:18:14 PDT 2023


Author: Noah Goldstein
Date: 2023-07-12T17:17:53-05:00
New Revision: 74f0ec5e2435fba1a6ad35f78548cb0d706a1bca

URL: https://github.com/llvm/llvm-project/commit/74f0ec5e2435fba1a6ad35f78548cb0d706a1bca
DIFF: https://github.com/llvm/llvm-project/commit/74f0ec5e2435fba1a6ad35f78548cb0d706a1bca.diff

LOG: [DAGCombiner] Make it so that `udiv` can be folded with `(select c, NonZero, 1)`

This is done by allowing speculation of `udiv` if we can prove the
denominator is non-zero.

https://alive2.llvm.org/ce/z/VNCt_q

Differential Revision: https://reviews.llvm.org/D149198

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/SelectionDAG.h
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
    llvm/test/CodeGen/X86/divrem-by-select.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 67904a8043a8e6..55c6354f03c854 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -2356,6 +2356,19 @@ class SelectionDAG {
     }
   }
 
+  /// Check if the provided node is save to speculatively executed given its
+  /// current arguments. So, while `udiv` the opcode is not safe to
+  /// speculatively execute, a given `udiv` node may be if the denominator is
+  /// known nonzero.
+  bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const {
+    switch (N->getOpcode()) {
+    case ISD::UDIV:
+      return isKnownNeverZero(N->getOperand(1));
+    default:
+      return isSafeToSpeculativelyExecute(N->getOpcode());
+    }
+  }
+
   SDValue makeStateFunctionCall(unsigned LibFunc, SDValue Ptr, SDValue InChain,
                                 const SDLoc &DLoc);
 

diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c1c24012d4f9ce..aad4e4a2d3f75a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2434,11 +2434,12 @@ static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
   if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
     return SDValue();
 
-  // We can't hoist div/rem because of immediate UB (not speculatable).
-  unsigned Opcode = N->getOpcode();
-  if (!DAG.isSafeToSpeculativelyExecute(Opcode))
+  // We can't hoist all instructions because of immediate UB (not speculatable).
+  // For example div/rem by zero.
+  if (!DAG.isSafeToSpeculativelyExecuteNode(N))
     return SDValue();
 
+  unsigned Opcode = N->getOpcode();
   EVT VT = N->getValueType(0);
   SDValue Cond = N1.getOperand(0);
   SDValue TVal = N1.getOperand(1);

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
index df80e60cfca18c..90b8a7fa70b32e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
@@ -1231,13 +1231,29 @@ define <vscale x 8 x i32> @vdivu_vx_mask_nxv8i32(<vscale x 8 x i32> %va, i32 sig
 }
 
 define <vscale x 8 x i32> @vdivu_vi_mask_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %mask) {
-; CHECK-LABEL: vdivu_vi_mask_nxv8i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv.v.i v12, 1
-; CHECK-NEXT:    vmerge.vim v12, v12, 7, v0
-; CHECK-NEXT:    vdivu.vv v8, v8, v12
-; CHECK-NEXT:    ret
+; RV32-LABEL: vdivu_vi_mask_nxv8i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, 149797
+; RV32-NEXT:    addi a0, a0, -1755
+; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; RV32-NEXT:    vmulhu.vx v12, v8, a0
+; RV32-NEXT:    vsub.vv v16, v8, v12
+; RV32-NEXT:    vsrl.vi v16, v16, 1
+; RV32-NEXT:    vadd.vv v12, v16, v12
+; RV32-NEXT:    vsrl.vi v8, v12, 2, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vdivu_vi_mask_nxv8i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lui a0, 149797
+; RV64-NEXT:    addiw a0, a0, -1755
+; RV64-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; RV64-NEXT:    vmulhu.vx v12, v8, a0
+; RV64-NEXT:    vsub.vv v16, v8, v12
+; RV64-NEXT:    vsrl.vi v16, v16, 1
+; RV64-NEXT:    vadd.vv v12, v16, v12
+; RV64-NEXT:    vsrl.vi v8, v12, 2, v0.t
+; RV64-NEXT:    ret
   %head1 = insertelement <vscale x 8 x i32> poison, i32 1, i32 0
   %one = shufflevector <vscale x 8 x i32> %head1, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
   %head2 = insertelement <vscale x 8 x i32> poison, i32 7, i32 0

diff  --git a/llvm/test/CodeGen/X86/divrem-by-select.ll b/llvm/test/CodeGen/X86/divrem-by-select.ll
index 5c6c291d02087a..16dea9a380c1f7 100644
--- a/llvm/test/CodeGen/X86/divrem-by-select.ll
+++ b/llvm/test/CodeGen/X86/divrem-by-select.ll
@@ -6,7 +6,8 @@ define <2 x i64> @udiv_identity_const(<2 x i1> %c, <2 x i64> %x) {
 ; CHECK-X64-V3-LABEL: udiv_identity_const:
 ; CHECK-X64-V3:       # %bb.0:
 ; CHECK-X64-V3-NEXT:    vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT:    vmovapd {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT:    vmovddup {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT:    # xmm2 = mem[0,0]
 ; CHECK-X64-V3-NEXT:    vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm0, %rcx
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm1, %rax
@@ -26,20 +27,16 @@ define <2 x i64> @udiv_identity_const(<2 x i1> %c, <2 x i64> %x) {
 ; CHECK-X64-V4:       # %bb.0:
 ; CHECK-X64-V4-NEXT:    vpsllq $63, %xmm0, %xmm0
 ; CHECK-X64-V4-NEXT:    vpmovq2m %xmm0, %k1
-; CHECK-X64-V4-NEXT:    vpbroadcastq {{.*#+}} xmm0 = [1,1]
-; CHECK-X64-V4-NEXT:    vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1}
-; CHECK-X64-V4-NEXT:    vpextrq $1, %xmm0, %rcx
-; CHECK-X64-V4-NEXT:    vpextrq $1, %xmm1, %rax
-; CHECK-X64-V4-NEXT:    xorl %edx, %edx
-; CHECK-X64-V4-NEXT:    divq %rcx
-; CHECK-X64-V4-NEXT:    movq %rax, %rcx
-; CHECK-X64-V4-NEXT:    vmovq %xmm0, %rsi
-; CHECK-X64-V4-NEXT:    vmovq %xmm1, %rax
-; CHECK-X64-V4-NEXT:    xorl %edx, %edx
-; CHECK-X64-V4-NEXT:    divq %rsi
+; CHECK-X64-V4-NEXT:    vpextrq $1, %xmm1, %rdx
+; CHECK-X64-V4-NEXT:    movabsq $3353953467947191203, %rax # imm = 0x2E8BA2E8BA2E8BA3
+; CHECK-X64-V4-NEXT:    mulxq %rax, %rcx, %rcx
 ; CHECK-X64-V4-NEXT:    vmovq %rcx, %xmm0
-; CHECK-X64-V4-NEXT:    vmovq %rax, %xmm1
-; CHECK-X64-V4-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-X64-V4-NEXT:    vmovq %xmm1, %rdx
+; CHECK-X64-V4-NEXT:    mulxq %rax, %rax, %rax
+; CHECK-X64-V4-NEXT:    vmovq %rax, %xmm2
+; CHECK-X64-V4-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; CHECK-X64-V4-NEXT:    vpsrlq $1, %xmm0, %xmm1 {%k1}
+; CHECK-X64-V4-NEXT:    vmovdqa %xmm1, %xmm0
 ; CHECK-X64-V4-NEXT:    retq
   %d = select <2 x i1> %c, <2 x i64> <i64 11, i64 11>, <2 x i64> <i64 1, i64 1>
   %r = udiv <2 x i64> %x, %d
@@ -51,7 +48,8 @@ define <2 x i64> @udiv_identity_const_todo_getter_nonzero(<2 x i1> %c, <2 x i64>
 ; CHECK-X64-V3-LABEL: udiv_identity_const_todo_getter_nonzero:
 ; CHECK-X64-V3:       # %bb.0:
 ; CHECK-X64-V3-NEXT:    vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT:    vmovapd {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT:    vmovddup {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT:    # xmm2 = mem[0,0]
 ; CHECK-X64-V3-NEXT:    vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm0, %rcx
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm1, %rax
@@ -101,7 +99,8 @@ define <2 x i64> @udiv_indentity_non_zero(<2 x i1> %c, <2 x i64> %x, <2 x i64> %
 ; CHECK-X64-V3-NEXT:    vpsllq $63, %xmm0, %xmm0
 ; CHECK-X64-V3-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
 ; CHECK-X64-V3-NEXT:    vpsubq %xmm3, %xmm2, %xmm2
-; CHECK-X64-V3-NEXT:    vmovapd {{.*#+}} xmm3 = [1,1]
+; CHECK-X64-V3-NEXT:    vmovddup {{.*#+}} xmm3 = [1,1]
+; CHECK-X64-V3-NEXT:    # xmm3 = mem[0,0]
 ; CHECK-X64-V3-NEXT:    vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm0, %rcx
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm1, %rax
@@ -189,7 +188,8 @@ define <2 x i64> @udiv_indentity_partial_zero(<2 x i1> %c, <2 x i64> %x) {
 ; CHECK-X64-V3-LABEL: udiv_indentity_partial_zero:
 ; CHECK-X64-V3:       # %bb.0:
 ; CHECK-X64-V3-NEXT:    vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT:    vmovapd {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT:    vmovddup {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT:    # xmm2 = mem[0,0]
 ; CHECK-X64-V3-NEXT:    vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm0, %rcx
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm1, %rax
@@ -233,7 +233,8 @@ define <2 x i64> @urem_identity_const(<2 x i1> %c, <2 x i64> %x) {
 ; CHECK-X64-V3-LABEL: urem_identity_const:
 ; CHECK-X64-V3:       # %bb.0:
 ; CHECK-X64-V3-NEXT:    vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT:    vmovapd {{.*#+}} xmm2 = [11,11]
+; CHECK-X64-V3-NEXT:    vmovddup {{.*#+}} xmm2 = [11,11]
+; CHECK-X64-V3-NEXT:    # xmm2 = mem[0,0]
 ; CHECK-X64-V3-NEXT:    vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm0, %rcx
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm1, %rax
@@ -277,7 +278,8 @@ define <2 x i64> @sdiv_identity_const(<2 x i1> %c, <2 x i64> %x) {
 ; CHECK-X64-V3-LABEL: sdiv_identity_const:
 ; CHECK-X64-V3:       # %bb.0:
 ; CHECK-X64-V3-NEXT:    vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT:    vmovapd {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT:    vmovddup {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT:    # xmm2 = mem[0,0]
 ; CHECK-X64-V3-NEXT:    vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm0, %rcx
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm1, %rax
@@ -321,7 +323,8 @@ define <2 x i64> @sdiv_identity_const_todo_better_nonzero(<2 x i1> %c, <2 x i64>
 ; CHECK-X64-V3-LABEL: sdiv_identity_const_todo_better_nonzero:
 ; CHECK-X64-V3:       # %bb.0:
 ; CHECK-X64-V3-NEXT:    vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT:    vmovapd {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT:    vmovddup {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT:    # xmm2 = mem[0,0]
 ; CHECK-X64-V3-NEXT:    vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm0, %rcx
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm1, %rax
@@ -365,7 +368,8 @@ define <2 x i64> @srem_identity_const(<2 x i1> %c, <2 x i64> %x) {
 ; CHECK-X64-V3-LABEL: srem_identity_const:
 ; CHECK-X64-V3:       # %bb.0:
 ; CHECK-X64-V3-NEXT:    vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT:    vmovapd {{.*#+}} xmm2 = [11,11]
+; CHECK-X64-V3-NEXT:    vmovddup {{.*#+}} xmm2 = [11,11]
+; CHECK-X64-V3-NEXT:    # xmm2 = mem[0,0]
 ; CHECK-X64-V3-NEXT:    vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm0, %rcx
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm1, %rax
@@ -409,7 +413,8 @@ define <2 x i64> @udivrem_identity_const(<2 x i1> %c, <2 x i64> %x) {
 ; CHECK-X64-V3-LABEL: udivrem_identity_const:
 ; CHECK-X64-V3:       # %bb.0:
 ; CHECK-X64-V3-NEXT:    vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT:    vmovapd {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT:    vmovddup {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT:    # xmm2 = mem[0,0]
 ; CHECK-X64-V3-NEXT:    vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm0, %rcx
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm1, %rax
@@ -465,7 +470,8 @@ define <2 x i64> @sdivrem_identity_const(<2 x i1> %c, <2 x i64> %x) {
 ; CHECK-X64-V3-LABEL: sdivrem_identity_const:
 ; CHECK-X64-V3:       # %bb.0:
 ; CHECK-X64-V3-NEXT:    vpsllq $63, %xmm0, %xmm0
-; CHECK-X64-V3-NEXT:    vmovapd {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT:    vmovddup {{.*#+}} xmm2 = [1,1]
+; CHECK-X64-V3-NEXT:    # xmm2 = mem[0,0]
 ; CHECK-X64-V3-NEXT:    vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm0, %rcx
 ; CHECK-X64-V3-NEXT:    vpextrq $1, %xmm1, %rax


        


More information about the llvm-commits mailing list