[llvm] [SelectionDAG] Detect impossible conditions using known bits analysis (PR #150715)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 25 15:52:11 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: AZero13 (AZero13)
<details>
<summary>Changes</summary>
---
Patch is 107.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150715.diff
3 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+52)
- (modified) llvm/test/CodeGen/AArch64/arm64-ccmp.ll (+4-16)
- (modified) llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll (+2105)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0e8e4c9618bb2..ca29e6fe1fb40 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13511,6 +13511,58 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
SDLoc DL(N);
+ // Detect impossible conditions using known bits analysis.
+ if (N1.getOpcode() == ISD::Constant) {
+ ConstantSDNode *N1C = cast<ConstantSDNode>(N1);
+ APInt C1 = N1C->getAPIntValue();
+ KnownBits KnownRHS = KnownBits::makeConstant(C1);
+
+ // Bail out early if RHS is unknown (shouldn't happen for constants)
+ if (KnownRHS.isUnknown())
+ return SDValue();
+
+ std::optional<bool> KnownVal;
+
+ // Handle special cases first (like GlobalISel does)
+ if (KnownRHS.isZero()) {
+ // x >=u 0 -> always true
+ // x <u 0 -> always false
+ if (Cond == ISD::SETUGE)
+ KnownVal = true;
+ else if (Cond == ISD::SETULT)
+ KnownVal = false;
+ }
+
+ // If not handled by special cases, use ICmpInst::compare
+ if (!KnownVal) {
+ KnownBits KnownLHS = DAG.computeKnownBits(N0);
+
+ // Convert ISD::CondCode to CmpInst::Predicate
+ CmpInst::Predicate Pred;
+ switch (Cond) {
+ case ISD::SETEQ: Pred = CmpInst::ICMP_EQ; break;
+ case ISD::SETNE: Pred = CmpInst::ICMP_NE; break;
+ case ISD::SETULT: Pred = CmpInst::ICMP_ULT; break;
+ case ISD::SETULE: Pred = CmpInst::ICMP_ULE; break;
+ case ISD::SETUGT: Pred = CmpInst::ICMP_UGT; break;
+ case ISD::SETUGE: Pred = CmpInst::ICMP_UGE; break;
+ case ISD::SETLT: Pred = CmpInst::ICMP_SLT; break;
+ case ISD::SETLE: Pred = CmpInst::ICMP_SLE; break;
+ case ISD::SETGT: Pred = CmpInst::ICMP_SGT; break;
+ case ISD::SETGE: Pred = CmpInst::ICMP_SGE; break;
+ default:
+ return SDValue(); // Unsupported predicate
+ }
+
+ // Use the same logic as GlobalISel: ICmpInst::compare
+ KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
+ }
+
+ // If the comparison result is known, replace with constant
+ if (KnownVal)
+ return DAG.getConstant(*KnownVal ? 1 : 0, DL, VT);
+ }
+
if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
// If we prefer to have a setcc, and we don't, we'll try our best to
// recreate one using rebuildSetCC.
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index 06e957fdcc6a2..9b22abcc94d3b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -597,22 +597,10 @@ define i32 @select_andor32(i32 %v1, i32 %v2, i32 %v3) {
}
define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
-; SDISEL-LABEL: select_noccmp1:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: cmp x0, #0
-; SDISEL-NEXT: ccmp x0, #13, #4, lt
-; SDISEL-NEXT: cset w8, gt
-; SDISEL-NEXT: cmp x2, #2
-; SDISEL-NEXT: ccmp x2, #4, #4, lt
-; SDISEL-NEXT: csinc w8, w8, wzr, le
-; SDISEL-NEXT: cmp w8, #0
-; SDISEL-NEXT: csel x0, xzr, x3, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: select_noccmp1:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x0, x3
-; GISEL-NEXT: ret
+; CHECK-LABEL: select_noccmp1:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: mov x0, x3
+; CHECK-NEXT: ret
%c0 = icmp slt i64 %v1, 0
%c1 = icmp sgt i64 %v1, 13
%c2 = icmp slt i64 %v3, 2
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index 40cff44d6d3e6..81868b6e01e74 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MUBUF %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-MUBUF,MUBUF %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca,+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLATSCR %s
@@ -21,6 +22,46 @@
; GCN: ds_write_b32 v0, v0
define void @func_mov_fi_i32() #0 {
+; CI-LABEL: func_mov_fi_i32:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_lshr_b32_e64 v0, s32, 6
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b32 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_mov_fi_i32:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_mov_fi_i32:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s32
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_mov_fi_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, s32
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_mov_fi_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s32
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, addrspace(5)
store volatile ptr addrspace(5) %alloca, ptr addrspace(3) poison
ret void
@@ -46,6 +87,61 @@ define void @func_mov_fi_i32() #0 {
; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, [[ADD]]
; GFX9-NEXT: ds_write_b32 v0, v0
define void @func_mov_fi_i32_offset() #0 {
+; CI-LABEL: func_mov_fi_i32_offset:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_lshr_b32_e64 v0, s32, 6
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b32 v0, v0
+; CI-NEXT: v_lshr_b32_e64 v0, s32, 6
+; CI-NEXT: v_add_i32_e32 v0, vcc, 4, v0
+; CI-NEXT: ds_write_b32 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_mov_fi_i32_offset:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 4, v0
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_mov_fi_i32_offset:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s32
+; GFX9-FLATSCR-NEXT: s_add_i32 s0, s32, 4
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_mov_fi_i32_offset:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_add_i32 s0, s32, 4
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s32 :: v_dual_mov_b32 v1, s0
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v1
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_mov_fi_i32_offset:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_add_i32 s0, s32, 4
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s32 :: v_dual_mov_b32 v1, s0
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v1
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca i32, addrspace(5)
%alloca1 = alloca i32, addrspace(5)
store volatile ptr addrspace(5) %alloca0, ptr addrspace(3) poison
@@ -71,6 +167,48 @@ define void @func_mov_fi_i32_offset() #0 {
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
define void @func_add_constant_to_fi_i32() #0 {
+; CI-LABEL: func_add_constant_to_fi_i32:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_lshr_b32_e64 v1, s32, 6
+; CI-NEXT: v_add_i32_e32 v0, vcc, 4, v1
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b32 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_add_constant_to_fi_i32:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v1, 6, s32
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 4, v1
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_add_constant_to_fi_i32:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: v_add_u32_e64 v0, 4, s32
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_add_constant_to_fi_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e64 v0, 4, s32
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_add_constant_to_fi_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e64 v0, 4, s32
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca [2 x i32], align 4, addrspace(5)
%gep0 = getelementptr inbounds [2 x i32], ptr addrspace(5) %alloca, i32 0, i32 1
store volatile ptr addrspace(5) %gep0, ptr addrspace(3) poison
@@ -93,6 +231,55 @@ define void @func_add_constant_to_fi_i32() #0 {
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
define void @func_other_fi_user_i32() #0 {
+; CI-LABEL: func_other_fi_user_i32:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_lshr_b32 s5, s32, 6
+; CI-NEXT: s_mul_i32 s4, s5, 9
+; CI-NEXT: v_mov_b32_e32 v0, s4
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b32 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_other_fi_user_i32:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_lshr_b32 s5, s32, 6
+; GFX9-MUBUF-NEXT: s_mul_i32 s4, s5, 9
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_other_fi_user_i32:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_mul_i32 s0, s32, 9
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_other_fi_user_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mul_i32 s0, s32, 9
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_other_fi_user_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mul_i32 s0, s32, 9
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca [2 x i32], align 4, addrspace(5)
%ptrtoint = ptrtoint ptr addrspace(5) %alloca to i32
%mul = mul i32 %ptrtoint, 9
@@ -105,6 +292,45 @@ define void @func_other_fi_user_i32() #0 {
; MUBUF: buffer_store_dword v1, v0, s[0:3], 0 offen{{$}}
; GFX9-FLATSCR: scratch_store_dword v0, v1, off{{$}}
define void @func_store_private_arg_i32_ptr(ptr addrspace(5) %ptr) #0 {
+; CI-LABEL: func_store_private_arg_i32_ptr:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_mov_b32_e32 v1, 15
+; CI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_store_private_arg_i32_ptr:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v1, 15
+; GFX9-MUBUF-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_store_private_arg_i32_ptr:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 15
+; GFX9-FLATSCR-NEXT: scratch_store_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_store_private_arg_i32_ptr:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 15
+; GFX11-TRUE16-NEXT: scratch_store_b32 v0, v1, off dlc
+; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_store_private_arg_i32_ptr:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 15
+; GFX11-FAKE16-NEXT: scratch_store_b32 v0, v1, off dlc
+; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
store volatile i32 15, ptr addrspace(5) %ptr
ret void
}
@@ -114,6 +340,40 @@ define void @func_store_private_arg_i32_ptr(ptr addrspace(5) %ptr) #0 {
; MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen glc{{$}}
; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v0, off glc{{$}}
define void @func_load_private_arg_i32_ptr(ptr addrspace(5) %ptr) #0 {
+; CI-LABEL: func_load_private_arg_i32_ptr:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen glc
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_load_private_arg_i32_ptr:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen glc
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_load_private_arg_i32_ptr:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v0, off glc
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_load_private_arg_i32_ptr:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: scratch_load_b32 v0, v0, off glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_load_private_arg_i32_ptr:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: scratch_load_b32 v0, v0, off glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%val = load volatile i32, ptr addrspace(5) %ptr
ret void
}
@@ -132,6 +392,48 @@ define void @func_load_private_arg_i32_ptr(ptr addrspace(5) %ptr) #0 {
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
define void @void_func_byval_struct_i8_i32_ptr(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 {
+; CI-LABEL: void_func_byval_struct_i8_i32_ptr:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_lshr_b32_e64 v1, s32, 6
+; CI-NEXT: v_or_b32_e32 v0, 4, v1
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b32 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: void_func_byval_struct_i8_i32_ptr:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v1, 6, s32
+; GFX9-MUBUF-NEXT: v_or_b32_e32 v0, 4, v1
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: void_func_byval_struct_i8_i32_ptr:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: v_or_b32_e64 v0, s32, 4
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: void_func_byval_struct_i8_i32_ptr:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_or_b32_e64 v0, s32, 4
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: void_func_byval_struct_i8_i32_ptr:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e64 v0, s32, 4
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 0
%gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 1
%load1 = load i32, ptr addrspace(5) %gep1
@@ -146,6 +448,68 @@ define void @void_func_byval_struct_i8_i32_ptr(ptr addrspace(5) byval({ i8, i32
; GFX9-FLATSCR-NEXT: scratch_load_ubyte v0, off, s32
; GFX9-FLATSCR-NEXT: scratch_load_dword v1, off, s32 offset:4
define void @void_func_byval_struct_i8_i32_ptr_value(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 {
+; CI-LABEL: void_func_byval_struct_i8_i32_ptr_value:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], s32
+; CI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: s_waitcnt vmcnt(1)
+; CI-NEXT: ds_write_b8 v0, v0
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: ds_write_b32 v0, v1
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: void_func_byval_struct_i8_i32_ptr_value:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: buffer_load_ubyte v0, off, s[0:3], s32
+; GFX9-MU...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/150715
More information about the llvm-commits
mailing list