[llvm] [AMDGPU] Improve llvm.amdgcn.wave.shuffle handling for pre-GFX8 (PR #174845)

Wed Jan 7 11:22:56 PST 2026

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: None (saxlungs)

<details>
<summary>Changes</summary>

Before, GlobalISel would still return true for lowering the intrinsic for GFX7 and earlier even though the required ds_bpermute_b32 instruction is not supported. After this change, GlobalISel will properly report failure to select in this case. Testing is updated appropriately.

---
Full diff: https://github.com/llvm/llvm-project/pull/174845.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+3) 
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+4) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.shuffle.ll (+5-4) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 3093b9aaf1743..15056a9a9cd65 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3915,6 +3915,9 @@ bool AMDGPUInstructionSelector::selectWaveShuffleIntrin(
   if (DstTy != LLT::scalar(32))
     return false;
 
+  if (!Subtarget->supportsBPermute())
+    return false;
+
   // If we can bpermute across the whole wave, then just do that
   if (Subtarget->supportsWaveWideBPermute()) {
     Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 2d1e54bf58831..c7cac17ddbe03 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1884,6 +1884,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     return RequiresWaitsBeforeSystemScopeStores;
   }
 
+  bool supportsBPermute() const {
+    return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
+  }
+
   bool supportsWaveWideBPermute() const {
     return (getGeneration() <= AMDGPUSubtarget::GFX9 ||
             getGeneration() == AMDGPUSubtarget::GFX12) ||
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.shuffle.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.shuffle.ll
index 9fc7ae205c7b1..1f259ac96be42 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.shuffle.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.shuffle.ll
@@ -20,16 +20,17 @@
 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-W64-GISEL %s
 
 ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX6-SDAG-ERR %s
-; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX6-GISEL-ERR %s
+; RUN: not llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX6-GISEL-ERR %s
 
 ; GFX6-SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.ds.bpermute
-; GFX6-GISEL-ERR: "Invalid opcode!"
+; GFX6-GISEL-ERR: LLVM ERROR: cannot select: %10:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.shuffle), %0:vgpr(s32), %1:vgpr(s32) (in function: test_wave_shuffle_float)
 
 ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX7-SDAG-ERR %s
-; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX7-GISEL-ERR %s
+; RUN: not llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX7-GISEL-ERR %s
 
 ; GFX7-SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.ds.bpermute
-; GFX7-GISEL-ERR: "Invalid opcode!"
+; GFX7-GISEL-ERR: LLVM ERROR: cannot select: %10:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.shuffle), %0:vgpr(s32), %1:vgpr(s32) (in function: test_wave_shuffle_float)
+
 
 
 define float @test_wave_shuffle_float(float %val, i32 %idx) {

``````````

</details>


https://github.com/llvm/llvm-project/pull/174845