[llvm] [AMDGPU][GlobalISel] Add RegBankLegalize support for G_BITREVERSE (PR #172101)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 12 14:56:00 PST 2025
https://github.com/vangthao95 created https://github.com/llvm/llvm-project/pull/172101
None
>From 537ddb9d13ca0803c92a81b16815a96ded151b82 Mon Sep 17 00:00:00 2001
From: Vang Thao <vthao at amd.com>
Date: Fri, 12 Dec 2025 14:50:48 -0800
Subject: [PATCH] [AMDGPU][GlobalISel] Add RegBankLegalize support for
G_BITREVERSE
---
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 6 +++
.../GlobalISel/regbankselect-bitreverse.mir | 8 +---
llvm/test/CodeGen/AMDGPU/bitreverse.ll | 46 +++++++++++--------
3 files changed, 35 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index d01afee331025..6c5f2d79e5851 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -916,6 +916,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
+ addRulesForGOpcs({G_BITREVERSE}, Standard)
+ .Uni(S32, {{Sgpr32}, {Sgpr32}})
+ .Div(S32, {{Vgpr32}, {Vgpr32}})
+ .Uni(S64, {{Sgpr64}, {Sgpr64}})
+ .Div(S64, {{Vgpr64}, {Vgpr64}});
+
addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitreverse.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitreverse.mir
index e82a492dbec20..6ae6d95ce3931 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitreverse.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitreverse.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s
---
name: bitreverse_i32_s
@@ -61,10 +60,7 @@ body: |
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
- ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:vgpr(s32) = G_BITREVERSE [[UV1]]
- ; CHECK-NEXT: [[BITREVERSE1:%[0-9]+]]:vgpr(s32) = G_BITREVERSE [[UV]]
- ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[BITREVERSE]](s32), [[BITREVERSE1]](s32)
+ ; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:vgpr(s64) = G_BITREVERSE [[COPY]]
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = G_BITREVERSE %0
...
diff --git a/llvm/test/CodeGen/AMDGPU/bitreverse.ll b/llvm/test/CodeGen/AMDGPU/bitreverse.ll
index e33b9ab0eda9e..5e5e6a6b19284 100644
--- a/llvm/test/CodeGen/AMDGPU/bitreverse.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitreverse.ll
@@ -2,11 +2,11 @@
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tahiti | FileCheck %s --check-prefix=SI
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefix=FLAT
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global | FileCheck %s --check-prefix=FLAT
-; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -global-isel | FileCheck %s --check-prefix=GISEL
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -global-isel -new-reg-bank-select | FileCheck %s --check-prefix=GISEL
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global | FileCheck %s --check-prefixes=GFX11-FLAT,GFX11-FLAT-TRUE16
; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global | FileCheck %s --check-prefixes=GFX11-FLAT,GFX11-FLAT-FAKE16
-; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+real-true16 -global-isel | FileCheck %s --check-prefixes=GFX11-GISEL,GFX11-GISEL-TRUE16
-; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-real-true16 -global-isel | FileCheck %s --check-prefixes=GFX11-GISEL,GFX11-GISEL-FAKE16
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+real-true16 -global-isel -new-reg-bank-select | FileCheck %s --check-prefixes=GFX11-GISEL,GFX11-GISEL-TRUE16
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-real-true16 -global-isel -new-reg-bank-select | FileCheck %s --check-prefixes=GFX11-GISEL,GFX11-GISEL-FAKE16
declare i32 @llvm.amdgcn.workitem.id.x() #1
@@ -151,9 +151,11 @@ define amdgpu_kernel void @v_brev_i16(ptr addrspace(1) noalias %out, ptr addrspa
; GISEL-NEXT: v_mov_b32_e32 v1, s3
; GISEL-NEXT: flat_load_ushort v0, v[0:1]
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: v_bfrev_b32_e32 v0, v0
-; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GISEL-NEXT: v_readfirstlane_b32 s2, v0
+; GISEL-NEXT: s_brev_b32 s2, s2
+; GISEL-NEXT: s_lshr_b32 s2, s2, 16
; GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-NEXT: v_mov_b32_e32 v2, s2
; GISEL-NEXT: v_mov_b32_e32 v1, s1
; GISEL-NEXT: flat_store_short v[0:1], v2
; GISEL-NEXT: s_endpgm
@@ -176,14 +178,16 @@ define amdgpu_kernel void @v_brev_i16(ptr addrspace(1) noalias %out, ptr addrspa
; GFX11-GISEL-TRUE16-LABEL: v_brev_i16:
; GFX11-GISEL-TRUE16: ; %bb.0:
; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3]
+; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3]
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-TRUE16-NEXT: v_bfrev_b32_e32 v1, v1
-; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX11-GISEL-TRUE16-NEXT: v_readfirstlane_b32 s2, v0
+; GFX11-GISEL-TRUE16-NEXT: s_brev_b32 s2, s2
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s2, s2, 16
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2
+; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX11-GISEL-TRUE16-NEXT: s_endpgm
;
; GFX11-GISEL-FAKE16-LABEL: v_brev_i16:
@@ -193,8 +197,12 @@ define amdgpu_kernel void @v_brev_i16(ptr addrspace(1) noalias %out, ptr addrspa
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3]
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-FAKE16-NEXT: v_bfrev_b32_e32 v1, v1
-; GFX11-GISEL-FAKE16-NEXT: global_store_d16_hi_b16 v0, v1, s[0:1]
+; GFX11-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s2, v1
+; GFX11-GISEL-FAKE16-NEXT: s_brev_b32 s2, s2
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s2, s2, 16
+; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, s2
+; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
%val = load i16, ptr addrspace(1) %valptr
%brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
@@ -641,8 +649,8 @@ define amdgpu_kernel void @v_brev_i64(ptr addrspace(1) noalias %out, ptr addrspa
; GISEL-NEXT: v_mov_b32_e32 v4, s1
; GISEL-NEXT: v_mov_b32_e32 v3, s0
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: v_bfrev_b32_e32 v1, v1
; GISEL-NEXT: v_bfrev_b32_e32 v2, v0
+; GISEL-NEXT: v_bfrev_b32_e32 v1, v1
; GISEL-NEXT: flat_store_dwordx2 v[3:4], v[1:2]
; GISEL-NEXT: s_endpgm
;
@@ -671,8 +679,8 @@ define amdgpu_kernel void @v_brev_i64(ptr addrspace(1) noalias %out, ptr addrspa
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v0, s[2:3]
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v1, v1
; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v2, v0
+; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v1, v1
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX11-GISEL-NEXT: global_store_b64 v0, v[1:2], s[0:1]
; GFX11-GISEL-NEXT: s_endpgm
@@ -819,11 +827,11 @@ define amdgpu_kernel void @v_brev_v2i64(ptr addrspace(1) noalias %out, ptr addrs
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GISEL-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: v_bfrev_b32_e32 v4, v1
; GISEL-NEXT: v_bfrev_b32_e32 v5, v0
+; GISEL-NEXT: v_bfrev_b32_e32 v4, v1
; GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GISEL-NEXT: v_bfrev_b32_e32 v6, v3
; GISEL-NEXT: v_bfrev_b32_e32 v7, v2
+; GISEL-NEXT: v_bfrev_b32_e32 v6, v3
; GISEL-NEXT: v_mov_b32_e32 v1, s1
; GISEL-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
; GISEL-NEXT: s_endpgm
@@ -855,10 +863,10 @@ define amdgpu_kernel void @v_brev_v2i64(ptr addrspace(1) noalias %out, ptr addrs
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: global_load_b128 v[0:3], v0, s[2:3]
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v4, v1
; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v5, v0
-; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v6, v3
+; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v4, v1
; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v7, v2
+; GFX11-GISEL-NEXT: v_bfrev_b32_e32 v6, v3
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX11-GISEL-NEXT: global_store_b128 v0, v[4:7], s[0:1]
; GFX11-GISEL-NEXT: s_endpgm
More information about the llvm-commits
mailing list