[llvm] AMDGPU/GlobalISel: Combine S16 copy-trunc-readanylane-anyext (PR #168410)

Petar Avramovic via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 18 05:59:23 PST 2025


https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/168410

>From 9e70882cad3857c0dfbc5285d567b921ef0280e7 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic at amd.com>
Date: Mon, 17 Nov 2025 18:42:52 +0100
Subject: [PATCH] AMDGPU/GlobalISel: Combine S16 copy-trunc-readanylane-anyext

---
 llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp          | 8 ++++++++
 .../CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll     | 7 -------
 llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll       | 6 ------
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index 907f8300de6d2..396d64625fb5c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -173,6 +173,14 @@ Register AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrc(Register Src) {
   if (mi_match(Src, MRI, m_GAMDGPUReadAnyLane(m_Reg(RALSrc))))
     return RALSrc;
 
+  // RALSrc = G_ANYEXT S16Src
+  // TruncSrc = G_AMDGPU_READANYLANE RALSrc
+  // Src = G_TRUNC TruncSrc
+  if (mi_match(Src, MRI,
+               m_GTrunc(m_GAMDGPUReadAnyLane(m_GAnyExt(m_Reg(RALSrc)))))) {
+    return RALSrc;
+  }
+
   // TruncSrc = G_AMDGPU_READANYLANE RALSrc
   // AextSrc = G_TRUNC TruncSrc
   // Src = G_ANYEXT AextSrc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
index 4361e5c113708..27005e7aa175e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
@@ -1070,9 +1070,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a
 ; GFX11-True16-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX11-True16-NEXT:    ds_load_u16_d16 v1, v1
 ; GFX11-True16-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-True16-NEXT:    v_readfirstlane_b32 s0, v1
-; GFX11-True16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-True16-NEXT:    v_mov_b16_e32 v1.l, s0
 ; GFX11-True16-NEXT:    ds_store_b16 v0, v1
 ; GFX11-True16-NEXT:    s_endpgm
 ;
@@ -1089,10 +1086,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a
 ; GFX12-True16-NEXT:    v_mov_b32_e32 v1, s0
 ; GFX12-True16-NEXT:    ds_load_u16_d16 v1, v1
 ; GFX12-True16-NEXT:    s_wait_dscnt 0x0
-; GFX12-True16-NEXT:    v_readfirstlane_b32 s0, v1
-; GFX12-True16-NEXT:    s_wait_alu 0xf1ff
-; GFX12-True16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-True16-NEXT:    v_mov_b16_e32 v1.l, s0
 ; GFX12-True16-NEXT:    ds_store_b16 v0, v1
 ; GFX12-True16-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
index bf36deac33380..9bf140cf744db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
@@ -13,9 +13,6 @@ define amdgpu_ps void @load_uniform_P1_i16_gfx12(ptr addrspace(1) inreg %ptra, p
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    global_load_d16_b16 v2, v2, s[0:1]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_mov_b16_e32 v2.l, s0
 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX11-NEXT:    s_endpgm
 ;
@@ -312,9 +309,6 @@ define amdgpu_ps void @load_uniform_P4_i16_gfx12(ptr addrspace(4) inreg %ptra, p
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    global_load_d16_b16 v2, v2, s[0:1]
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_mov_b16_e32 v2.l, s0
 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX11-NEXT:    s_endpgm
 ;



More information about the llvm-commits mailing list