[llvm] [AMDGPU][GlobalISel] Add regbankselect rules for G_FSHR (PR #159818)
Anshil Gandhi via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 31 10:16:45 PDT 2025
https://github.com/gandhi56 updated https://github.com/llvm/llvm-project/pull/159818
>From ddec0998c977f42b74ec92657cd4c7e986a4633a Mon Sep 17 00:00:00 2001
From: Anshil Gandhi <Anshil.Gandhi at amd.com>
Date: Fri, 19 Sep 2025 12:06:18 -0500
Subject: [PATCH] [AMDGPU] Add regbankselect rules for G_FSHR
---
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 10 +
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 11 ++
.../AMDGPU/GlobalISel/regbankselect-fshr.mir | 185 +++++++++++++++---
3 files changed, 184 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index b84c30ecaac0b..3790c17d98a7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -461,6 +461,16 @@ void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
Hi = B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0);
break;
}
+ case AMDGPU::G_FSHR: {
+ auto [X0, X1] = unpackAExt(MI.getOperand(1).getReg());
+ auto [Y0, Y1] = unpackAExt(MI.getOperand(2).getReg());
+ auto [S0, S1] = unpackZExt(MI.getOperand(3).getReg());
+
+ const RegisterBank *DstRB = MRI.getRegBank(MI.getOperand(0).getReg());
+ Lo = B.buildInstr(AMDGPU::G_FSHR, {{DstRB, S32}}, {X0, Y0, S0}).getReg(0);
+ Hi = B.buildInstr(AMDGPU::G_FSHR, {{DstRB, S32}}, {X1, Y1, S1}).getReg(0);
+ break;
+ }
default:
llvm_unreachable("Unpack lowering not implemented");
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 01abd358ff595..5b06d54cae32f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -526,6 +526,17 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
+ addRulesForGOpcs({G_FSHR}, Standard)
+ .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt, Sgpr32ZExt}})
+ .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
+ .Uni(V2S16,
+ {{SgprV2S16}, {SgprV2S16, SgprV2S16, SgprV2S16}, UnpackBitShift})
+ .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}})
+ .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}})
+ .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
+ .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64, Sgpr32}})
+ .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr32}});
+
addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir
index b1a55fe7bc42f..5685b4362e1ae 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir
@@ -1,37 +1,77 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize -verify-machineinstrs -o - %s | FileCheck %s
---
-name: fshr_sss
+name: fshr_s16_sss
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2
- ; CHECK-LABEL: name: fshr_sss
+ ; CHECK-LABEL: name: fshr_s16_sss
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s16) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s16) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s16) = COPY $sgpr2
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[COPY]](s16)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[COPY1]](s16)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[COPY2]](s16)
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s32) = G_FSHR [[ANYEXT]], [[ANYEXT1]], [[ZEXT]](s32)
+ %0:_(s16) = COPY $sgpr0
+ %1:_(s16) = COPY $sgpr1
+ %2:_(s16) = COPY $sgpr2
+ %3:_(s16) = G_FSHR %0, %1, %2
+...
+
+---
+name: fshr_s16_vvv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: fshr_s16_vvv
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s16) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY $vgpr2
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s16) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s16)
+ %0:_(s16) = COPY $vgpr0
+ %1:_(s16) = COPY $vgpr1
+ %2:_(s16) = COPY $vgpr2
+ %3:_(s16) = G_FSHR %0, %1, %2
+...
+
+---
+name: fshr_s32_sss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: fshr_s32_sss
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY4]], [[COPY5]](s32)
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
%3:_(s32) = G_FSHR %0, %1, %2
...
+
---
-name: fshr_vss
+name: fshr_s32_vss
legalized: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0, $sgpr1
- ; CHECK-LABEL: name: fshr_vss
+ ; CHECK-LABEL: name: fshr_s32_vss
; CHECK: liveins: $vgpr0, $sgpr0, $sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
@@ -46,13 +86,13 @@ body: |
%3:_(s32) = G_FSHR %0, %1, %2
...
---
-name: fshr_svs
+name: fshr_s32_svs
legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0, $sgpr1
- ; CHECK-LABEL: name: fshr_svs
+ ; CHECK-LABEL: name: fshr_s32_svs
; CHECK: liveins: $sgpr0, $vgpr0, $sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
@@ -67,13 +107,13 @@ body: |
%3:_(s32) = G_FSHR %0, %1, %2
...
---
-name: fshr_ssv
+name: fshr_s32_ssv
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0
- ; CHECK-LABEL: name: fshr_ssv
+ ; CHECK-LABEL: name: fshr_s32_ssv
; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
@@ -88,13 +128,13 @@ body: |
%3:_(s32) = G_FSHR %0, %1, %2
...
---
-name: fshr_vvs
+name: fshr_s32_vvs
legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr0
- ; CHECK-LABEL: name: fshr_vvs
+ ; CHECK-LABEL: name: fshr_s32_vvs
; CHECK: liveins: $vgpr0, $vgpr1, $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
@@ -108,13 +148,13 @@ body: |
%3:_(s32) = G_FSHR %0, %1, %2
...
---
-name: fshr_vsv
+name: fshr_s32_vsv
legalized: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0, $vgpr1
- ; CHECK-LABEL: name: fshr_vsv
+ ; CHECK-LABEL: name: fshr_s32_vsv
; CHECK: liveins: $vgpr0, $sgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
@@ -128,13 +168,13 @@ body: |
%3:_(s32) = G_FSHR %0, %1, %2
...
---
-name: fshr_svv
+name: fshr_s32_svv
legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1
- ; CHECK-LABEL: name: fshr_svv
+ ; CHECK-LABEL: name: fshr_s32_svv
; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
@@ -148,13 +188,13 @@ body: |
%3:_(s32) = G_FSHR %0, %1, %2
...
---
-name: fshr_vvv
+name: fshr_s32_vvv
legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
- ; CHECK-LABEL: name: fshr_vvv
+ ; CHECK-LABEL: name: fshr_s32_vvv
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
@@ -166,3 +206,104 @@ body: |
%2:_(s32) = COPY $vgpr2
%3:_(s32) = G_FSHR %0, %1, %2
...
+
+---
+name: fshr_v2s16_sss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: fshr_v2s16_sss
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr2
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY2]](<2 x s16>)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST2]], [[C1]]
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s32) = G_FSHR [[BITCAST]], [[BITCAST1]], [[AND]](s32)
+ ; CHECK-NEXT: [[FSHR1:%[0-9]+]]:sgpr(s32) = G_FSHR [[LSHR]], [[LSHR1]], [[LSHR2]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[FSHR]](s32), [[FSHR1]](s32)
+ %0:_(<2 x s16>) = COPY $sgpr0
+ %1:_(<2 x s16>) = COPY $sgpr1
+ %2:_(<2 x s16>) = COPY $sgpr2
+ %3:_(<2 x s16>) = G_FSHR %0, %1, %2
+...
+
+---
+name: fshr_v2s16_vvv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: fshr_v2s16_vvv
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $vgpr1
+ %2:_(<2 x s16>) = COPY $vgpr2
+ %3:_(<2 x s16>) = G_FSHR %0, %1, %2
+...
+
+---
+name: fshr_s64_sss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: fshr_s64_sss
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s64) = G_FSHR [[MV]], [[MV1]], [[COPY2]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s64) = G_ZEXT %0
+ %4:_(s64) = G_ZEXT %1
+ %5:_(s64) = G_FSHR %3, %4, %2
+...
+
+---
+name: fshr_s64_vvv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: fshr_s64_vvv
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s64) = G_FSHR [[MV]], [[MV1]], [[COPY2]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s64) = G_ZEXT %0
+ %4:_(s64) = G_ZEXT %1
+ %5:_(s64) = G_FSHR %3, %4, %2
+...
More information about the llvm-commits
mailing list