[llvm] ad0acf4 - AMDGPU/GlobalISel: Combine S16 copy-trunc-readanylane-anyext (#168410)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 24 06:57:11 PST 2025
Author: Petar Avramovic
Date: 2025-11-24T15:57:07+01:00
New Revision: ad0acf4af001a3781b41b572788adcd7d652d18a
URL: https://github.com/llvm/llvm-project/commit/ad0acf4af001a3781b41b572788adcd7d652d18a
DIFF: https://github.com/llvm/llvm-project/commit/ad0acf4af001a3781b41b572788adcd7d652d18a.diff
LOG: AMDGPU/GlobalISel: Combine S16 copy-trunc-readanylane-anyext (#168410)
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index 907f8300de6d2..396d64625fb5c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -173,6 +173,14 @@ Register AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrc(Register Src) {
if (mi_match(Src, MRI, m_GAMDGPUReadAnyLane(m_Reg(RALSrc))))
return RALSrc;
+ // RALSrc = G_ANYEXT S16Src
+ // TruncSrc = G_AMDGPU_READANYLANE RALSrc
+ // Src = G_TRUNC TruncSrc
+ if (mi_match(Src, MRI,
+ m_GTrunc(m_GAMDGPUReadAnyLane(m_GAnyExt(m_Reg(RALSrc)))))) {
+ return RALSrc;
+ }
+
// TruncSrc = G_AMDGPU_READANYLANE RALSrc
// AextSrc = G_TRUNC TruncSrc
// Src = G_ANYEXT AextSrc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
index 4361e5c113708..27005e7aa175e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
@@ -1070,9 +1070,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a
; GFX11-True16-NEXT: v_mov_b32_e32 v1, s0
; GFX11-True16-NEXT: ds_load_u16_d16 v1, v1
; GFX11-True16-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-True16-NEXT: v_readfirstlane_b32 s0, v1
-; GFX11-True16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-True16-NEXT: v_mov_b16_e32 v1.l, s0
; GFX11-True16-NEXT: ds_store_b16 v0, v1
; GFX11-True16-NEXT: s_endpgm
;
@@ -1089,10 +1086,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a
; GFX12-True16-NEXT: v_mov_b32_e32 v1, s0
; GFX12-True16-NEXT: ds_load_u16_d16 v1, v1
; GFX12-True16-NEXT: s_wait_dscnt 0x0
-; GFX12-True16-NEXT: v_readfirstlane_b32 s0, v1
-; GFX12-True16-NEXT: s_wait_alu 0xf1ff
-; GFX12-True16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-True16-NEXT: v_mov_b16_e32 v1.l, s0
; GFX12-True16-NEXT: ds_store_b16 v0, v1
; GFX12-True16-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
index bf36deac33380..9bf140cf744db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
@@ -13,9 +13,6 @@ define amdgpu_ps void @load_uniform_P1_i16_gfx12(ptr addrspace(1) inreg %ptra, p
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_load_d16_b16 v2, v2, s[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
; GFX11-NEXT: s_endpgm
;
@@ -312,9 +309,6 @@ define amdgpu_ps void @load_uniform_P4_i16_gfx12(ptr addrspace(4) inreg %ptra, p
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_load_d16_b16 v2, v2, s[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
; GFX11-NEXT: s_endpgm
;
More information about the llvm-commits
mailing list