[llvm] [SelectionDAG] Detect impossible conditions using known bits analysis (PR #150715)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 25 15:52:11 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: AZero13 (AZero13)

<details>
<summary>Changes</summary>



---

Patch is 107.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150715.diff


3 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+52) 
- (modified) llvm/test/CodeGen/AArch64/arm64-ccmp.ll (+4-16) 
- (modified) llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll (+2105) 


``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0e8e4c9618bb2..ca29e6fe1fb40 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13511,6 +13511,58 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
   SDLoc DL(N);
 
+  // Detect impossible conditions using known bits analysis.
+  if (N1.getOpcode() == ISD::Constant) {
+    ConstantSDNode *N1C = cast<ConstantSDNode>(N1);
+    APInt C1 = N1C->getAPIntValue();
+    KnownBits KnownRHS = KnownBits::makeConstant(C1);
+    
+    // Bail out early if RHS is unknown (shouldn't happen for constants)
+    if (KnownRHS.isUnknown())
+      return SDValue();
+    
+    std::optional<bool> KnownVal;
+    
+    // Handle special cases first (like GlobalISel does)
+    if (KnownRHS.isZero()) {
+      // x >=u 0 -> always true
+      // x <u 0 -> always false  
+      if (Cond == ISD::SETUGE)
+        KnownVal = true;
+      else if (Cond == ISD::SETULT)
+        KnownVal = false;
+    }
+    
+    // If not handled by special cases, use ICmpInst::compare
+    if (!KnownVal) {
+      KnownBits KnownLHS = DAG.computeKnownBits(N0);
+      
+      // Convert ISD::CondCode to CmpInst::Predicate
+      CmpInst::Predicate Pred;
+      switch (Cond) {
+      case ISD::SETEQ:  Pred = CmpInst::ICMP_EQ; break;
+      case ISD::SETNE:  Pred = CmpInst::ICMP_NE; break;
+      case ISD::SETULT: Pred = CmpInst::ICMP_ULT; break;
+      case ISD::SETULE: Pred = CmpInst::ICMP_ULE; break;
+      case ISD::SETUGT: Pred = CmpInst::ICMP_UGT; break;
+      case ISD::SETUGE: Pred = CmpInst::ICMP_UGE; break;
+      case ISD::SETLT:  Pred = CmpInst::ICMP_SLT; break;
+      case ISD::SETLE:  Pred = CmpInst::ICMP_SLE; break;
+      case ISD::SETGT:  Pred = CmpInst::ICMP_SGT; break;
+      case ISD::SETGE:  Pred = CmpInst::ICMP_SGE; break;
+      default: 
+        return SDValue(); // Unsupported predicate
+      }
+      
+      // Use the same logic as GlobalISel: ICmpInst::compare
+      KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
+    }
+    
+    // If the comparison result is known, replace with constant
+    if (KnownVal)
+      return DAG.getConstant(*KnownVal ? 1 : 0, DL, VT);
+  }
+
   if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
     // If we prefer to have a setcc, and we don't, we'll try our best to
     // recreate one using rebuildSetCC.
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index 06e957fdcc6a2..9b22abcc94d3b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -597,22 +597,10 @@ define i32 @select_andor32(i32 %v1, i32 %v2, i32 %v3) {
 }
 
 define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
-; SDISEL-LABEL: select_noccmp1:
-; SDISEL:       ; %bb.0:
-; SDISEL-NEXT:    cmp x0, #0
-; SDISEL-NEXT:    ccmp x0, #13, #4, lt
-; SDISEL-NEXT:    cset w8, gt
-; SDISEL-NEXT:    cmp x2, #2
-; SDISEL-NEXT:    ccmp x2, #4, #4, lt
-; SDISEL-NEXT:    csinc w8, w8, wzr, le
-; SDISEL-NEXT:    cmp w8, #0
-; SDISEL-NEXT:    csel x0, xzr, x3, ne
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: select_noccmp1:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov x0, x3
-; GISEL-NEXT:    ret
+; CHECK-LABEL: select_noccmp1:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    mov x0, x3
+; CHECK-NEXT:    ret
   %c0 = icmp slt i64 %v1, 0
   %c1 = icmp sgt i64 %v1, 13
   %c2 = icmp slt i64 %v3, 2
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index 40cff44d6d3e6..81868b6e01e74 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MUBUF %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-MUBUF,MUBUF %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca,+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLATSCR %s
@@ -21,6 +22,46 @@
 
 ; GCN: ds_write_b32 v0, v0
 define void @func_mov_fi_i32() #0 {
+; CI-LABEL: func_mov_fi_i32:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    v_lshr_b32_e64 v0, s32, 6
+; CI-NEXT:    s_mov_b32 m0, -1
+; CI-NEXT:    ds_write_b32 v0, v0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_mov_fi_i32:
+; GFX9-MUBUF:       ; %bb.0:
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT:    v_lshrrev_b32_e64 v0, 6, s32
+; GFX9-MUBUF-NEXT:    ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_mov_fi_i32:
+; GFX9-FLATSCR:       ; %bb.0:
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, s32
+; GFX9-FLATSCR-NEXT:    ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_mov_fi_i32:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v0, s32
+; GFX11-TRUE16-NEXT:    ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_mov_fi_i32:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, s32
+; GFX11-FAKE16-NEXT:    ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca i32, addrspace(5)
   store volatile ptr addrspace(5) %alloca, ptr addrspace(3) poison
   ret void
@@ -46,6 +87,61 @@ define void @func_mov_fi_i32() #0 {
 ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, [[ADD]]
 ; GFX9-NEXT:         ds_write_b32 v0, v0
 define void @func_mov_fi_i32_offset() #0 {
+; CI-LABEL: func_mov_fi_i32_offset:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    v_lshr_b32_e64 v0, s32, 6
+; CI-NEXT:    s_mov_b32 m0, -1
+; CI-NEXT:    ds_write_b32 v0, v0
+; CI-NEXT:    v_lshr_b32_e64 v0, s32, 6
+; CI-NEXT:    v_add_i32_e32 v0, vcc, 4, v0
+; CI-NEXT:    ds_write_b32 v0, v0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_mov_fi_i32_offset:
+; GFX9-MUBUF:       ; %bb.0:
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT:    v_lshrrev_b32_e64 v0, 6, s32
+; GFX9-MUBUF-NEXT:    ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT:    v_lshrrev_b32_e64 v0, 6, s32
+; GFX9-MUBUF-NEXT:    v_add_u32_e32 v0, 4, v0
+; GFX9-MUBUF-NEXT:    ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_mov_fi_i32_offset:
+; GFX9-FLATSCR:       ; %bb.0:
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, s32
+; GFX9-FLATSCR-NEXT:    s_add_i32 s0, s32, 4
+; GFX9-FLATSCR-NEXT:    ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-FLATSCR-NEXT:    ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_mov_fi_i32_offset:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_add_i32 s0, s32, 4
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT:    v_dual_mov_b32 v0, s32 :: v_dual_mov_b32 v1, s0
+; GFX11-TRUE16-NEXT:    ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT:    ds_store_b32 v0, v1
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_mov_fi_i32_offset:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_add_i32 s0, s32, 4
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-FAKE16-NEXT:    v_dual_mov_b32 v0, s32 :: v_dual_mov_b32 v1, s0
+; GFX11-FAKE16-NEXT:    ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT:    ds_store_b32 v0, v1
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %alloca0 = alloca i32, addrspace(5)
   %alloca1 = alloca i32, addrspace(5)
   store volatile ptr addrspace(5) %alloca0, ptr addrspace(3) poison
@@ -71,6 +167,48 @@ define void @func_mov_fi_i32_offset() #0 {
 ; GCN-NOT: v_mov
 ; GCN: ds_write_b32 v0, v0
 define void @func_add_constant_to_fi_i32() #0 {
+; CI-LABEL: func_add_constant_to_fi_i32:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    v_lshr_b32_e64 v1, s32, 6
+; CI-NEXT:    v_add_i32_e32 v0, vcc, 4, v1
+; CI-NEXT:    s_mov_b32 m0, -1
+; CI-NEXT:    ds_write_b32 v0, v0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_add_constant_to_fi_i32:
+; GFX9-MUBUF:       ; %bb.0:
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT:    v_lshrrev_b32_e64 v1, 6, s32
+; GFX9-MUBUF-NEXT:    v_add_u32_e32 v0, 4, v1
+; GFX9-MUBUF-NEXT:    ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_add_constant_to_fi_i32:
+; GFX9-FLATSCR:       ; %bb.0:
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT:    v_add_u32_e64 v0, 4, s32
+; GFX9-FLATSCR-NEXT:    ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_add_constant_to_fi_i32:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_add_nc_u32_e64 v0, 4, s32
+; GFX11-TRUE16-NEXT:    ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_add_constant_to_fi_i32:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_add_nc_u32_e64 v0, 4, s32
+; GFX11-FAKE16-NEXT:    ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca [2 x i32], align 4, addrspace(5)
   %gep0 = getelementptr inbounds [2 x i32], ptr addrspace(5) %alloca, i32 0, i32 1
   store volatile ptr addrspace(5) %gep0, ptr addrspace(3) poison
@@ -93,6 +231,55 @@ define void @func_add_constant_to_fi_i32() #0 {
 ; GCN-NOT: v_mov
 ; GCN: ds_write_b32 v0, v0
 define void @func_other_fi_user_i32() #0 {
+; CI-LABEL: func_other_fi_user_i32:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    s_lshr_b32 s5, s32, 6
+; CI-NEXT:    s_mul_i32 s4, s5, 9
+; CI-NEXT:    v_mov_b32_e32 v0, s4
+; CI-NEXT:    s_mov_b32 m0, -1
+; CI-NEXT:    ds_write_b32 v0, v0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_other_fi_user_i32:
+; GFX9-MUBUF:       ; %bb.0:
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT:    s_lshr_b32 s5, s32, 6
+; GFX9-MUBUF-NEXT:    s_mul_i32 s4, s5, 9
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-MUBUF-NEXT:    ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_other_fi_user_i32:
+; GFX9-FLATSCR:       ; %bb.0:
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT:    s_mul_i32 s0, s32, 9
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-FLATSCR-NEXT:    ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_other_fi_user_i32:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mul_i32 s0, s32, 9
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-TRUE16-NEXT:    ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_other_fi_user_i32:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mul_i32 s0, s32, 9
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, s0
+; GFX11-FAKE16-NEXT:    ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca [2 x i32], align 4, addrspace(5)
   %ptrtoint = ptrtoint ptr addrspace(5) %alloca to i32
   %mul = mul i32 %ptrtoint, 9
@@ -105,6 +292,45 @@ define void @func_other_fi_user_i32() #0 {
 ; MUBUF:        buffer_store_dword v1, v0, s[0:3], 0 offen{{$}}
 ; GFX9-FLATSCR: scratch_store_dword v0, v1, off{{$}}
 define void @func_store_private_arg_i32_ptr(ptr addrspace(5) %ptr) #0 {
+; CI-LABEL: func_store_private_arg_i32_ptr:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v1, 15
+; CI-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
+; CI-NEXT:    s_waitcnt vmcnt(0)
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_store_private_arg_i32_ptr:
+; GFX9-MUBUF:       ; %bb.0:
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v1, 15
+; GFX9-MUBUF-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_store_private_arg_i32_ptr:
+; GFX9-FLATSCR:       ; %bb.0:
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT:    v_mov_b32_e32 v1, 15
+; GFX9-FLATSCR-NEXT:    scratch_store_dword v0, v1, off
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_store_private_arg_i32_ptr:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v1, 15
+; GFX11-TRUE16-NEXT:    scratch_store_b32 v0, v1, off dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_store_private_arg_i32_ptr:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v1, 15
+; GFX11-FAKE16-NEXT:    scratch_store_b32 v0, v1, off dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   store volatile i32 15, ptr addrspace(5) %ptr
   ret void
 }
@@ -114,6 +340,40 @@ define void @func_store_private_arg_i32_ptr(ptr addrspace(5) %ptr) #0 {
 ; MUBUF-NEXT:        buffer_load_dword v0, v0, s[0:3], 0 offen glc{{$}}
 ; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v0, off glc{{$}}
 define void @func_load_private_arg_i32_ptr(ptr addrspace(5) %ptr) #0 {
+; CI-LABEL: func_load_private_arg_i32_ptr:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen glc
+; CI-NEXT:    s_waitcnt vmcnt(0)
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_load_private_arg_i32_ptr:
+; GFX9-MUBUF:       ; %bb.0:
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen glc
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_load_private_arg_i32_ptr:
+; GFX9-FLATSCR:       ; %bb.0:
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT:    scratch_load_dword v0, v0, off glc
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_load_private_arg_i32_ptr:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v0, v0, off glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_load_private_arg_i32_ptr:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v0, v0, off glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = load volatile i32, ptr addrspace(5) %ptr
   ret void
 }
@@ -132,6 +392,48 @@ define void @func_load_private_arg_i32_ptr(ptr addrspace(5) %ptr) #0 {
 ; GCN-NOT: v_mov
 ; GCN: ds_write_b32 v0, v0
 define void @void_func_byval_struct_i8_i32_ptr(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 {
+; CI-LABEL: void_func_byval_struct_i8_i32_ptr:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    v_lshr_b32_e64 v1, s32, 6
+; CI-NEXT:    v_or_b32_e32 v0, 4, v1
+; CI-NEXT:    s_mov_b32 m0, -1
+; CI-NEXT:    ds_write_b32 v0, v0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: void_func_byval_struct_i8_i32_ptr:
+; GFX9-MUBUF:       ; %bb.0:
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT:    v_lshrrev_b32_e64 v1, 6, s32
+; GFX9-MUBUF-NEXT:    v_or_b32_e32 v0, 4, v1
+; GFX9-MUBUF-NEXT:    ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: void_func_byval_struct_i8_i32_ptr:
+; GFX9-FLATSCR:       ; %bb.0:
+; GFX9-FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT:    v_or_b32_e64 v0, s32, 4
+; GFX9-FLATSCR-NEXT:    ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: void_func_byval_struct_i8_i32_ptr:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_or_b32_e64 v0, s32, 4
+; GFX11-TRUE16-NEXT:    ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: void_func_byval_struct_i8_i32_ptr:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_or_b32_e64 v0, s32, 4
+; GFX11-FAKE16-NEXT:    ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 0
   %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 1
   %load1 = load i32, ptr addrspace(5) %gep1
@@ -146,6 +448,68 @@ define void @void_func_byval_struct_i8_i32_ptr(ptr addrspace(5) byval({ i8, i32
 ; GFX9-FLATSCR-NEXT: scratch_load_ubyte v0, off, s32
 ; GFX9-FLATSCR-NEXT: scratch_load_dword v1, off, s32 offset:4
 define void @void_func_byval_struct_i8_i32_ptr_value(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 {
+; CI-LABEL: void_func_byval_struct_i8_i32_ptr_value:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    buffer_load_ubyte v0, off, s[0:3], s32
+; CI-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:4
+; CI-NEXT:    s_mov_b32 m0, -1
+; CI-NEXT:    s_waitcnt vmcnt(1)
+; CI-NEXT:    ds_write_b8 v0, v0
+; CI-NEXT:    s_waitcnt vmcnt(0)
+; CI-NEXT:    ds_write_b32 v0, v1
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: void_func_byval_struct_i8_i32_ptr_value:
+; GFX9-MUBUF:       ; %bb.0:
+; GFX9-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT:    buffer_load_ubyte v0, off, s[0:3], s32
+; GFX9-MU...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/150715


More information about the llvm-commits mailing list