[llvm] [AMDGPU][GlobalISel] Add regbankselect rules for G_FSHR (PR #159818)
Anshil Gandhi via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 19 14:08:38 PDT 2025
https://github.com/gandhi56 updated https://github.com/llvm/llvm-project/pull/159818
>From 21a65ff5739db06725ed2aac983199d73de6fe85 Mon Sep 17 00:00:00 2001
From: Anshil Gandhi <Anshil.Gandhi at amd.com>
Date: Fri, 19 Sep 2025 12:06:18 -0500
Subject: [PATCH] [AMDGPU] Add regbankselect rules for G_FSHR
---
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 10 ++
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 11 ++
.../AMDGPU/GlobalISel/regbankselect-fshr.mir | 158 +++++++++++++++---
3 files changed, 158 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 73b2660727342..89287e13d4cc1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -461,6 +461,16 @@ void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
Hi = B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0);
break;
}
+ case AMDGPU::G_FSHR: {
+ auto [X0, X1] = unpackAExt(MI.getOperand(1).getReg());
+ auto [Y0, Y1] = unpackAExt(MI.getOperand(2).getReg());
+ auto [S0, S1] = unpackZExt(MI.getOperand(3).getReg());
+
+ const RegisterBank *DstRB = MRI.getRegBank(MI.getOperand(0).getReg());
+ Lo = B.buildInstr(AMDGPU::G_FSHR, {{DstRB, S32}}, {X0, Y0, S0}).getReg(0);
+ Hi = B.buildInstr(AMDGPU::G_FSHR, {{DstRB, S32}}, {X1, Y1, S1}).getReg(0);
+ break;
+ }
default:
llvm_unreachable("Unpack lowering not implemented");
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 0776d14a84067..7384c407b1573 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -514,6 +514,17 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
+ addRulesForGOpcs({G_FSHR}, Standard)
+ .Uni(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
+ .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
+ .Uni(V2S16,
+ {{VgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}, UnpackBitShift})
+ .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}})
+ .Uni(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
+ .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
+ .Uni(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr32}})
+ .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr32}});
+
addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir
index b1a55fe7bc42f..8e94a8c10bf80 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fshr.mir
@@ -1,6 +1,46 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name: fshr_s16_sss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: fshr_s16_sss
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s16) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s16) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s16) = COPY $sgpr2
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s16) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s16)
+ %0:_(s16) = COPY $sgpr0
+ %1:_(s16) = COPY $sgpr1
+ %2:_(s16) = COPY $sgpr2
+ %3:_(s16) = G_FSHR %0, %1, %2
+...
+
+---
+name: fshr_s16_vvv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: fshr_s16_vvv
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s16) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY $vgpr2
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s16) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s16)
+ %0:_(s16) = COPY $vgpr0
+ %1:_(s16) = COPY $vgpr1
+ %2:_(s16) = COPY $vgpr2
+ %3:_(s16) = G_FSHR %0, %1, %2
+...
---
name: fshr_sss
@@ -15,10 +55,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY4]], [[COPY5]](s32)
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
@@ -37,9 +74,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY3]], [[COPY4]](s32)
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = COPY $sgpr1
@@ -58,9 +93,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY1]], [[COPY4]](s32)
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $sgpr1
@@ -79,9 +112,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY4]], [[COPY2]](s32)
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $vgpr0
@@ -100,8 +131,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
- ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY3]](s32)
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $sgpr0
@@ -120,8 +150,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY3]], [[COPY2]](s32)
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $vgpr1
@@ -140,8 +169,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY3]], [[COPY1]], [[COPY2]](s32)
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
@@ -166,3 +194,91 @@ body: |
%2:_(s32) = COPY $vgpr2
%3:_(s32) = G_FSHR %0, %1, %2
...
+
+---
+name: fshr_v2s16_sss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: fshr_v2s16_sss
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr2
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(<2 x s16>) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $sgpr0
+ %1:_(<2 x s16>) = COPY $sgpr1
+ %2:_(<2 x s16>) = COPY $sgpr2
+ %3:_(<2 x s16>) = G_FSHR %0, %1, %2
+...
+
+---
+name: fshr_v2s16_vvv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: fshr_v2s16_vvv
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $vgpr1
+ %2:_(<2 x s16>) = COPY $vgpr2
+ %3:_(<2 x s16>) = G_FSHR %0, %1, %2
+...
+
+---
+name: fshr_s64_sss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: fshr_s64_sss
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[COPY]](s32)
+ ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s64) = G_ZEXT [[COPY1]](s32)
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:sgpr(s64) = G_FSHR [[ZEXT]], [[ZEXT1]], [[COPY2]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s64) = G_ZEXT %0
+ %4:_(s64) = G_ZEXT %1
+ %5:_(s64) = G_FSHR %3, %4, %2
+...
+
+---
+name: fshr_s64_vvv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: fshr_s64_vvv
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[COPY]](s32)
+ ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:vgpr(s64) = G_ZEXT [[COPY1]](s32)
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:vgpr(s64) = G_FSHR [[ZEXT]], [[ZEXT1]], [[COPY2]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s64) = G_ZEXT %0
+ %4:_(s64) = G_ZEXT %1
+ %5:_(s64) = G_FSHR %3, %4, %2
+...
More information about the llvm-commits
mailing list