[llvm] [AMDGPU][DAG] Remove AssertZext before some intrinsics (PR #146052)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 27 03:48:07 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Pierre van Houtryve (Pierre-vh)
<details>
<summary>Changes</summary>
---
Patch is 30.11 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146052.diff
11 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+18)
- (modified) llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/flat-scratch.ll (+27-27)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx11.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx12.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.iterative.ll (+10-10)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-unsupported-calling-convention.ll (+5-11)
- (modified) llvm/test/CodeGen/AMDGPU/memory_clause.ll (+4-4)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3db2b3bff2d36..7a5619fb1fda1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3985,6 +3985,24 @@ SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N,
}
}
+ // AssertZext in front of these intrinsics is not necessary, the lowering of
+ // the intrinsics into a register read will insert one if it is needed.
+ if (N->getOpcode() == ISD::AssertZext &&
+ N0.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+ unsigned IID = N0.getConstantOperandVal(0);
+ switch (IID) {
+ case Intrinsic::amdgcn_workitem_id_x:
+ case Intrinsic::amdgcn_workitem_id_y:
+ case Intrinsic::amdgcn_workitem_id_z:
+ case Intrinsic::amdgcn_workgroup_id_x:
+ case Intrinsic::amdgcn_workgroup_id_y:
+ case Intrinsic::amdgcn_workgroup_id_z:
+ return N0;
+ default:
+ break;
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
index 9cf9d81773037..7a542d2e9c514 100644
--- a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
@@ -442,9 +442,9 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use() #1 {
;
; GFX11-LABEL: add_x_shl_neg_to_sub_multi_use:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0, v0
; GFX11-NEXT: ds_store_b32 v0, v1 offset:123
; GFX11-NEXT: ds_store_b32 v0, v1 offset:456
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index b5e579b78a59c..b25d9b245f5f6 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -714,10 +714,10 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
; GFX11-LABEL: store_load_vindex_kernel:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24
-; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: v_mov_b32_e32 v2, 15
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_lshl_b32 s0, s0, 7
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
@@ -732,9 +732,9 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
; GFX12-LABEL: store_load_vindex_kernel:
; GFX12: ; %bb.0: ; %bb
; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24
-; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_lshl_b32 s0, s0, 7
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
@@ -769,8 +769,8 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
; GFX942-LABEL: store_load_vindex_kernel:
; GFX942: ; %bb.0: ; %bb
; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24
-; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX942-NEXT: v_mov_b32_e32 v1, 15
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_lshl_b32 s0, s0, 7
@@ -809,10 +809,10 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
; GFX11-PAL-LABEL: store_load_vindex_kernel:
; GFX11-PAL: ; %bb.0: ; %bb
; GFX11-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-PAL-NEXT: v_mov_b32_e32 v2, 15
; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 7
; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
@@ -827,9 +827,9 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
; GFX12-PAL-LABEL: store_load_vindex_kernel:
; GFX12-PAL: ; %bb.0: ; %bb
; GFX12-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX12-PAL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX12-PAL-NEXT: s_wait_kmcnt 0x0
; GFX12-PAL-NEXT: s_lshl_b32 s0, s0, 7
; GFX12-PAL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
@@ -1958,10 +1958,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
; GFX11-LABEL: store_load_vindex_small_offset_kernel:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24
-; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-NEXT: scratch_load_b32 v3, off, off glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:384 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
@@ -1976,10 +1976,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
; GFX12-LABEL: store_load_vindex_small_offset_kernel:
; GFX12: ; %bb.0: ; %bb
; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24
-; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:384 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
@@ -2021,8 +2021,8 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24
; GFX942-NEXT: scratch_load_dword v1, off, off sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX942-NEXT: v_mov_b32_e32 v1, 15
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_lshl_b32 s0, s0, 7
@@ -2092,10 +2092,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
; GFX11-PAL-LABEL: store_load_vindex_small_offset_kernel:
; GFX11-PAL: ; %bb.0: ; %bb
; GFX11-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-PAL-NEXT: scratch_load_b32 v3, off, off glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX11-PAL-NEXT: scratch_store_b32 v0, v1, off offset:384 dlc
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0)
@@ -2110,10 +2110,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
; GFX12-PAL-LABEL: store_load_vindex_small_offset_kernel:
; GFX12-PAL: ; %bb.0: ; %bb
; GFX12-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX12-PAL-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
-; GFX12-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX12-PAL-NEXT: s_wait_kmcnt 0x0
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, off offset:384 scope:SCOPE_SYS
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
@@ -3254,10 +3254,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
; GFX11-LABEL: store_load_vindex_large_offset_kernel:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24
-; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-NEXT: scratch_load_b32 v3, off, off offset:4 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_lshl_b32 s0, s0, 7
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -3274,10 +3274,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
; GFX12-LABEL: store_load_vindex_large_offset_kernel:
; GFX12: ; %bb.0: ; %bb
; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24
-; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
-; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:16512 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
@@ -3319,8 +3319,8 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24
; GFX942-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX942-NEXT: v_mov_b32_e32 v1, 15
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_lshl_b32 s0, s0, 7
@@ -3391,10 +3391,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
; GFX11-PAL-LABEL: store_load_vindex_large_offset_kernel:
; GFX11-PAL: ; %bb.0: ; %bb
; GFX11-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-PAL-NEXT: scratch_load_b32 v3, off, off offset:4 glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 7
; GFX11-PAL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -3411,10 +3411,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
; GFX12-PAL-LABEL: store_load_vindex_large_offset_kernel:
; GFX12-PAL: ; %bb.0: ; %bb
; GFX12-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX12-PAL-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
-; GFX12-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GFX12-PAL-NEXT: s_wait_kmcnt 0x0
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, off offset:16512 scope:SCOPE_SYS
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
index 565ad295ebbb3..08c0d15432915 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
@@ -15,8 +15,8 @@ define amdgpu_kernel void @test_iglp_opt_mfma_gemm(ptr addrspace(3) noalias %in,
; GCN-LABEL: test_iglp_opt_mfma_gemm:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 7, v0
+; GCN-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
; GCN-NEXT: v_mov_b32_e32 v3, 2.0
; GCN-NEXT: ; iglp_opt mask(0x00000000)
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -153,8 +153,8 @@ define amdgpu_kernel void @test_iglp_opt_rev_mfma_gemm(ptr addrspace(3) noalias
; GCN-LABEL: test_iglp_opt_rev_mfma_gemm:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 7, v0
+; GCN-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
; GCN-NEXT: v_mov_b32_e32 v2, 1.0
; GCN-NEXT: v_mov_b32_e32 v3, 2.0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -289,8 +289,8 @@ define amdgpu_kernel void @test_iglp_opt_asm_sideeffect(ptr addrspace(3) noalias
; GCN-LABEL: test_iglp_opt_asm_sideeffect:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_and_b32_e32 v0, 0xffc, v0
; GCN-NEXT: ; iglp_opt mask(0x00000000)
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_add_u32_e32 v1, s0, v0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx11.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx11.ll
index 6507976872410..46359f7e99059 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx11.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx11.ll
@@ -6,9 +6,9 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_WMMA_cluster(ptr ad
; GCN-LABEL: test_sched_group_barrier_pipeline_WMMA_cluster:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 5, v0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GCN-NEXT: v_lshlrev_b32_e32 v40, 5, v0
+; GCN-NEXT: v_and_b32_e32 v40, 0x7fe0, v0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_add_nc_u32_e32 v32, s0, v40
; GCN-NEXT: v_dual_mov_b32 v81, s1 :: v_dual_add_nc_u32 v80, s1, v40
@@ -74,9 +74,9 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_WMMA_cluster(ptr ad
; EXACTCUTOFF-LABEL: test_sched_group_barrier_pipeline_WMMA_cluster:
; EXACTCUTOFF: ; %bb.0: ; %entry
; EXACTCUTOFF-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; EXACTCUTOFF-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v0, 5, v0
; EXACTCUTOFF-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v40, 5, v0
+; EXACTCUTOFF-NEXT: v_and_b32_e32 v40, 0x7fe0, v0
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(0)
; EXACTCUTOFF-NEXT: v_add_nc_u32_e32 v32, s0, v40
; EXACTCUTOFF-NEXT: v_dual_mov_b32 v81, s1 :: v_dual_add_nc_u32 v80, s1, v40
@@ -178,9 +178,9 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_WMMA_interleave(ptr
; GCN-LABEL: test_sched_group_barrier_pipeline_WMMA_interleave:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 5, v0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GCN-NEXT: v_lshlrev_b32_e32 v16, 5, v0
+; GCN-NEXT: v_and_b32_e32 v16, 0x7fe0, v0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_add_nc_u32_e32 v17, s0, v16
; GCN-NEXT: v_add_nc_u32_e32 v16, s1, v16
@@ -260,9 +260,9 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_WMMA_interleave(ptr
; EXACTCUTOFF-LABEL: test_sched_group_barrier_pipeline_WMMA_interleave:
; EXACTCUTOFF: ; %bb.0: ; %entry
; EXACTCUTOFF-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; EXACTCUTOFF-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v0, 5, v0
; EXACTCUTOFF-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v16, 5, v0
+; EXACTCUTOFF-NEXT: v_and_b32_e32 v16, 0x7fe0, v0
; EXACTCUTOFF-NEXT: s_waitcnt lgkmcnt(0)
; EXACTCUTOFF-NEXT: v_add_nc_u32_e32 v17, s0, v16
; EXACTCUTOFF-NEXT: v_add_nc_u32_e32 v16, s1, v16
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx12.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx12.ll
index 02e80b62fed6e..dcc3e0df0c744 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx12.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx12.ll
@@ -8,10 +8,10 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_SWMMAC_cluster(ptr
; GCN-LABEL: test_sched_group_barrier_pipeline_SWMMAC_cluster:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GCN-NEXT: v_mov_b32_e32 v48, 0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GCN-NEXT: v_lshlrev_b32_e32 v28, 4, v0
+; GCN-NEXT: v_and_b32_e32 v28, 0x3ff0, v0
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: v_add_nc_u32_e32 v0, s0, v28
; GCN-NEXT: v_dual_mov_b32 v50, s1 :: v_dual_add_nc_u32 v49, s1, v28
@@ -60,10 +60,10 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_SWMMAC_cluster(ptr
; EXACTCUTOFF-LABEL: test_sched_group_barrier_pipeline_SWMMAC_cluster:
; EXACTCUTOFF: ; %bb.0: ; %entry
; EXACTCUTOFF-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; EXACTCUTOFF-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; EXACTCUTOFF-NEXT: v_mov_b32_e32 v48, 0
; EXACTCUTOFF-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; EXACTCUTOFF-NEXT: v_lshlrev_b32_e32 v28, 4, v0
+; EXACTCUTOFF-NEXT: v_and_b32_e32 v28, 0x3ff0, v0
; EXACTCUTOFF-NEXT: s_wait_kmcnt 0x0
; EXACTCUTOFF-NEXT: v_add_nc_u32_e32 v0, s0, v28
; EXACTCUTOFF-NEXT: v_dual_mov_b32 v50, s1 :: v_dual_add_nc_u32 v49, s1, v28
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.iterative.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.iterative.ll
index 371b4f070094d..0764cd5d34d75 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.iterative.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.iterative.ll
@@ -7,8 +7,8 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr
; GCN-MINREG-LABEL: test_sched_group_barrier_pipeline_MFMA_interleave:
; GCN-MINREG: ; %bb.0: ; %entry
; GCN-MINREG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GCN-MINREG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GCN-MINREG-NEXT: v_lshlrev_b32_e32 v0, 7, v0
+; GCN-MINREG-NEXT: v_and_b32_e32 v0, 0x1ff80, v0
; GCN-MINREG-NEXT: v_mov_b32_e32 v2, 1.0
; GCN-MINREG-NEXT: v_mov_b32_e32 v1, 2.0
; GCN-MINREG-NEXT: s_waitcnt lgkmcnt(0)
@@ -140,8 +140,8 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr
; GCN-MAXOCC-LABEL: test_sched_group_barrier_pipeline_MFMA_interleave:
; GCN-MAXOCC: ; %bb.0: ; %entry
; GCN-MAXOCC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GCN-MAXOCC-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GCN-MAXOCC-NEXT: v_lshlrev_b32_e32 v1, 7, v0
+; GCN-MAXOCC-NEXT: v_lshlrev_b32_e32 v0, 7, v0
+; GCN-MAXOCC-NEXT: v_and_b32_e32 v1, 0x1ff80, v0
; GCN-MAXOCC-NEXT: v_mov_b32_e32 v2, 1.0
; GCN-MAXOCC-NEXT: v_mov_b32_e32 v3, 2.0
; GCN-MAXOCC-NEXT: s_waitcnt lgkmcnt(0)
@@ -274,8 +274,8 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr
; GCN-ILP-LABEL: test_sched_group_barrier_pipeline_MFMA_interleave:
; GCN-ILP: ; %bb.0: ; %entry
; GCN-ILP-NEXT: s_...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/146052
More information about the llvm-commits
mailing list