[llvm] 2bc9358 - [DAG] Constant Folding for U/SMUL_LOHI (#69437)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 23 22:37:59 PDT 2023


Author: Pierre van Houtryve
Date: 2023-10-24T07:37:55+02:00
New Revision: 2bc93584f5193d78177273c45c82ea69675f44e7

URL: https://github.com/llvm/llvm-project/commit/2bc93584f5193d78177273c45c82ea69675f44e7
DIFF: https://github.com/llvm/llvm-project/commit/2bc93584f5193d78177273c45c82ea69675f44e7.diff

LOG: [DAG] Constant Folding for U/SMUL_LOHI (#69437)

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/test/CodeGen/AMDGPU/udiv.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1add486255b8977..c0a737b70c460e7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5351,6 +5351,10 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
+  // Constant fold.
+  if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
+    return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
+
   // canonicalize constant to RHS (vector doesn't have to splat)
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
@@ -5389,6 +5393,10 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
+  // Constant fold.
+  if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
+    return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
+
   // canonicalize constant to RHS (vector doesn't have to splat)
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
       !DAG.isConstantIntBuildVectorOrConstantInt(N1))

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 1ee2f0337392f98..fcc485dbb2cc512 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -9903,6 +9903,28 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
            VTList.VTs[0] == Ops[0].getValueType() &&
            VTList.VTs[0] == Ops[1].getValueType() &&
            "Binary operator types must match!");
+    // Constant fold.
+    ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(Ops[0]);
+    ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ops[1]);
+    if (LHS && RHS) {
+      unsigned Width = VTList.VTs[0].getScalarSizeInBits();
+      unsigned OutWidth = Width * 2;
+      APInt Val = LHS->getAPIntValue();
+      APInt Mul = RHS->getAPIntValue();
+      if (Opcode == ISD::SMUL_LOHI) {
+        Val = Val.sext(OutWidth);
+        Mul = Mul.sext(OutWidth);
+      } else {
+        Val = Val.zext(OutWidth);
+        Mul = Mul.zext(OutWidth);
+      }
+      Val *= Mul;
+
+      SDValue Hi =
+          getConstant(Val.extractBits(Width, Width), DL, VTList.VTs[0]);
+      SDValue Lo = getConstant(Val.trunc(Width), DL, VTList.VTs[0]);
+      return getNode(ISD::MERGE_VALUES, DL, VTList, {Lo, Hi}, Flags);
+    }
     break;
   }
   case ISD::FFREXP: {

diff  --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll
index 5d1db03a1a35bbd..0ca8f8aace983c5 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv.ll
@@ -2598,37 +2598,38 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
 ; VI-LABEL: v_test_udiv64_mulhi_fold:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v4, 0xa7c5
-; VI-NEXT:    v_mul_u32_u24_e32 v3, 0x500, v4
-; VI-NEXT:    v_mul_hi_u32_u24_e32 v2, 0x500, v4
-; VI-NEXT:    v_add_u32_e32 v3, vcc, 0x4237, v3
-; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v2, vcc
-; VI-NEXT:    v_add_u32_e32 v6, vcc, 0xa9000000, v3
-; VI-NEXT:    s_mov_b32 s6, 0xfffe7960
-; VI-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0
-; VI-NEXT:    v_addc_u32_e32 v7, vcc, v5, v4, vcc
-; VI-NEXT:    v_mul_lo_u32 v4, v7, s6
+; VI-NEXT:    s_mov_b32 s4, 0x346d900
+; VI-NEXT:    s_add_u32 s4, 0x4237, s4
+; VI-NEXT:    v_mov_b32_e32 v2, 0xa9000000
+; VI-NEXT:    v_add_u32_e32 v6, vcc, s4, v2
+; VI-NEXT:    s_mov_b32 s4, 0xfffe7960
+; VI-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, s4, 0
+; VI-NEXT:    s_addc_u32 s6, 0, 0
+; VI-NEXT:    s_cmp_lg_u64 vcc, 0
+; VI-NEXT:    s_addc_u32 s6, s6, 0xa7c5
+; VI-NEXT:    s_mul_i32 s4, s6, 0xfffe7960
 ; VI-NEXT:    v_sub_u32_e32 v3, vcc, v3, v6
-; VI-NEXT:    v_mul_hi_u32 v8, v6, v2
-; VI-NEXT:    v_add_u32_e32 v5, vcc, v4, v3
+; VI-NEXT:    v_add_u32_e32 v5, vcc, s4, v3
 ; VI-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0
-; VI-NEXT:    v_add_u32_e32 v8, vcc, v8, v3
-; VI-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v7, v2, 0
-; VI-NEXT:    v_addc_u32_e32 v9, vcc, 0, v4, vcc
-; VI-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v7, v5, 0
-; VI-NEXT:    v_add_u32_e32 v2, vcc, v8, v2
-; VI-NEXT:    v_addc_u32_e32 v2, vcc, v9, v3, vcc
+; VI-NEXT:    v_mul_hi_u32 v7, v6, v2
+; VI-NEXT:    v_add_u32_e32 v7, vcc, v7, v3
+; VI-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], s6, v2, 0
+; VI-NEXT:    v_addc_u32_e32 v8, vcc, 0, v4, vcc
+; VI-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], s6, v5, 0
+; VI-NEXT:    v_add_u32_e32 v2, vcc, v7, v2
+; VI-NEXT:    v_addc_u32_e32 v2, vcc, v8, v3, vcc
 ; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v5, vcc
 ; VI-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
 ; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; VI-NEXT:    v_add_u32_e32 v4, vcc, v6, v2
-; VI-NEXT:    v_addc_u32_e32 v5, vcc, v7, v3, vcc
-; VI-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v0, v5, 0
-; VI-NEXT:    v_mul_hi_u32 v6, v0, v4
+; VI-NEXT:    v_mov_b32_e32 v4, s6
+; VI-NEXT:    v_add_u32_e32 v5, vcc, v6, v2
+; VI-NEXT:    v_addc_u32_e32 v4, vcc, v4, v3, vcc
+; VI-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v0, v4, 0
+; VI-NEXT:    v_mul_hi_u32 v6, v0, v5
 ; VI-NEXT:    v_add_u32_e32 v6, vcc, v6, v2
 ; VI-NEXT:    v_addc_u32_e32 v7, vcc, 0, v3, vcc
-; VI-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v1, v4, 0
-; VI-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v1, v5, 0
+; VI-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v1, v5, 0
+; VI-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v1, v4, 0
 ; VI-NEXT:    v_add_u32_e32 v2, vcc, v6, v2
 ; VI-NEXT:    v_addc_u32_e32 v2, vcc, v7, v3, vcc
 ; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v5, vcc


        


More information about the llvm-commits mailing list