[llvm] [AMDGPU][GlobalISel] Add RegBankLegalize support for G_BITREVERSE (PR #172101)

via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 12 14:56:00 PST 2025


https://github.com/vangthao95 created https://github.com/llvm/llvm-project/pull/172101

None

>From 537ddb9d13ca0803c92a81b16815a96ded151b82 Mon Sep 17 00:00:00 2001
From: Vang Thao <vthao at amd.com>
Date: Fri, 12 Dec 2025 14:50:48 -0800
Subject: [PATCH] [AMDGPU][GlobalISel] Add RegBankLegalize support for
 G_BITREVERSE

---
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp     |  6 +++
 .../GlobalISel/regbankselect-bitreverse.mir   |  8 +---
 llvm/test/CodeGen/AMDGPU/bitreverse.ll        | 46 +++++++++++--------
 3 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index d01afee331025..6c5f2d79e5851 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -916,6 +916,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
 
   addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
 
+  addRulesForGOpcs({G_BITREVERSE}, Standard)
+      .Uni(S32, {{Sgpr32}, {Sgpr32}})
+      .Div(S32, {{Vgpr32}, {Vgpr32}})
+      .Uni(S64, {{Sgpr64}, {Sgpr64}})
+      .Div(S64, {{Vgpr64}, {Vgpr64}});
+
   addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
 
   addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitreverse.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitreverse.mir
index e82a492dbec20..6ae6d95ce3931 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitreverse.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitreverse.mir
@@ -1,6 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s
 
 ---
 name: bitreverse_i32_s
@@ -61,10 +60,7 @@ body: |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:vgpr(s32) = G_BITREVERSE [[UV1]]
-    ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:vgpr(s32) = G_BITREVERSE [[UV]]
-    ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[BITREVERSE]](s32), [[BITREVERSE1]](s32)
+    ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:vgpr(s64) = G_BITREVERSE [[COPY]]
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_BITREVERSE %0
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/bitreverse.ll b/llvm/test/CodeGen/AMDGPU/bitreverse.ll
index e33b9ab0eda9e..5e5e6a6b19284 100644
--- a/llvm/test/CodeGen/AMDGPU/bitreverse.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitreverse.ll
@@ -2,11 +2,11 @@
 ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tahiti | FileCheck %s --check-prefix=SI
 ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefix=FLAT
 ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global | FileCheck %s --check-prefix=FLAT
-; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -global-isel | FileCheck %s --check-prefix=GISEL
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -global-isel -new-reg-bank-select | FileCheck %s --check-prefix=GISEL
 ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global | FileCheck %s --check-prefixes=GFX11-FLAT,GFX11-FLAT-TRUE16
 ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global | FileCheck %s --check-prefixes=GFX11-FLAT,GFX11-FLAT-FAKE16
-; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+real-true16 -global-isel | FileCheck %s --check-prefixes=GFX11-GISEL,GFX11-GISEL-TRUE16
-; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-real-true16 -global-isel | FileCheck %s --check-prefixes=GFX11-GISEL,GFX11-GISEL-FAKE16
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+real-true16 -global-isel -new-reg-bank-select | FileCheck %s --check-prefixes=GFX11-GISEL,GFX11-GISEL-TRUE16
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-real-true16 -global-isel -new-reg-bank-select | FileCheck %s --check-prefixes=GFX11-GISEL,GFX11-GISEL-FAKE16
 
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 
@@ -151,9 +151,11 @@ define amdgpu_kernel void @v_brev_i16(ptr addrspace(1) noalias %out, ptr addrspa
 ; GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GISEL-NEXT:    flat_load_ushort v0, v[0:1]
 ; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    v_bfrev_b32_e32 v0, v0
-; GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GISEL-NEXT:    v_readfirstlane_b32 s2, v0
+; GISEL-NEXT:    s_brev_b32 s2, s2
+; GISEL-NEXT:    s_lshr_b32 s2, s2, 16
 ; GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-NEXT:    v_mov_b32_e32 v2, s2
 ; GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GISEL-NEXT:    flat_store_short v[0:1], v2
 ; GISEL-NEXT:    s_endpgm
@@ -176,14 +178,16 @@ define amdgpu_kernel void @v_brev_i16(ptr addrspace(1) noalias %out, ptr addrspa
 ; GFX11-GISEL-TRUE16-LABEL: v_brev_i16:
 ; GFX11-GISEL-TRUE16:       ; %bb.0:
 ; GFX11-GISEL-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-TRUE16-NEXT:    global_load_u16 v1, v0, s[2:3]
+; GFX11-GISEL-TRUE16-NEXT:    global_load_u16 v0, v1, s[2:3]
 ; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-GISEL-TRUE16-NEXT:    v_bfrev_b32_e32 v1, v1
-; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX11-GISEL-TRUE16-NEXT:    global_store_b16 v0, v1, s[0:1]
+; GFX11-GISEL-TRUE16-NEXT:    v_readfirstlane_b32 s2, v0
+; GFX11-GISEL-TRUE16-NEXT:    s_brev_b32 s2, s2
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-TRUE16-NEXT:    s_lshr_b32 s2, s2, 16
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v0.l, s2
+; GFX11-GISEL-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1]
 ; GFX11-GISEL-TRUE16-NEXT:    s_endpgm
 ;
 ; GFX11-GISEL-FAKE16-LABEL: v_brev_i16:
@@ -193,8 +197,12 @@ define amdgpu_kernel void @v_brev_i16(ptr addrspace(1) noalias %out, ptr addrspa
 ; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-GISEL-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3]
 ; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-GISEL-FAKE16-NEXT:    v_bfrev_b32_e32 v1, v1
-; GFX11-GISEL-FAKE16-NEXT:    global_store_d16_hi_b16 v0, v1, s[0:1]
+; GFX11-GISEL-FAKE16-NEXT:    v_readfirstlane_b32 s2, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_brev_b32 s2, s2
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-FAKE16-NEXT:    s_lshr_b32 s2, s2, 16
+; GFX11-GISEL-FAKE16-NEXT:    v_mov_b32_e32 v1, s2
+; GFX11-GISEL-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1]
 ; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
   %val = load i16, ptr addrspace(1) %valptr
   %brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
@@ -641,8 +649,8 @@ define amdgpu_kernel void @v_brev_i64(ptr addrspace(1) noalias %out, ptr addrspa
 ; GISEL-NEXT:    v_mov_b32_e32 v4, s1
 ; GISEL-NEXT:    v_mov_b32_e32 v3, s0
 ; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    v_bfrev_b32_e32 v1, v1
 ; GISEL-NEXT:    v_bfrev_b32_e32 v2, v0
+; GISEL-NEXT:    v_bfrev_b32_e32 v1, v1
 ; GISEL-NEXT:    flat_store_dwordx2 v[3:4], v[1:2]
 ; GISEL-NEXT:    s_endpgm
 ;
@@ -671,8 +679,8 @@ define amdgpu_kernel void @v_brev_i64(ptr addrspace(1) noalias %out, ptr addrspa
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    global_load_b64 v[0:1], v0, s[2:3]
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT:    v_bfrev_b32_e32 v1, v1
 ; GFX11-GISEL-NEXT:    v_bfrev_b32_e32 v2, v0
+; GFX11-GISEL-NEXT:    v_bfrev_b32_e32 v1, v1
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-GISEL-NEXT:    global_store_b64 v0, v[1:2], s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
@@ -819,11 +827,11 @@ define amdgpu_kernel void @v_brev_v2i64(ptr addrspace(1) noalias %out, ptr addrs
 ; GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GISEL-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
 ; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    v_bfrev_b32_e32 v4, v1
 ; GISEL-NEXT:    v_bfrev_b32_e32 v5, v0
+; GISEL-NEXT:    v_bfrev_b32_e32 v4, v1
 ; GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GISEL-NEXT:    v_bfrev_b32_e32 v6, v3
 ; GISEL-NEXT:    v_bfrev_b32_e32 v7, v2
+; GISEL-NEXT:    v_bfrev_b32_e32 v6, v3
 ; GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GISEL-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
 ; GISEL-NEXT:    s_endpgm
@@ -855,10 +863,10 @@ define amdgpu_kernel void @v_brev_v2i64(ptr addrspace(1) noalias %out, ptr addrs
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    global_load_b128 v[0:3], v0, s[2:3]
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT:    v_bfrev_b32_e32 v4, v1
 ; GFX11-GISEL-NEXT:    v_bfrev_b32_e32 v5, v0
-; GFX11-GISEL-NEXT:    v_bfrev_b32_e32 v6, v3
+; GFX11-GISEL-NEXT:    v_bfrev_b32_e32 v4, v1
 ; GFX11-GISEL-NEXT:    v_bfrev_b32_e32 v7, v2
+; GFX11-GISEL-NEXT:    v_bfrev_b32_e32 v6, v3
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-GISEL-NEXT:    global_store_b128 v0, v[4:7], s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm



More information about the llvm-commits mailing list