[llvm-branch-commits] [llvm] [AMDGPU][GlobalISel] Add RegBankLegalize rules for SMED3 and CVT_PK_I16_I32 (PR #176596)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Jan 17 14:10:33 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: None (vangthao95)
<details>
<summary>Changes</summary>
These opcodes are created together for the i64->i16 signed clamp pattern.
---
Full diff: https://github.com/llvm/llvm-project/pull/176596.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp (+8)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll (+4-4)
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-cvt-pk-i16-i32.mir (+41)
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-smed3.mir (+46)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index ce80a94f29222..def076525c470 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1060,6 +1060,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
+ addRulesForGOpcs({G_AMDGPU_SMED3}, Standard)
+ .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
+ .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
+
+ addRulesForGOpcs({G_AMDGPU_CVT_PK_I16_I32}, Standard)
+ .Uni(V2S16, {{UniInVgprV2S16}, {Vgpr32, Vgpr32}})
+ .Div(V2S16, {{VgprV2S16}, {Vgpr32, Vgpr32}});
+
// FNEG and FABS are either folded as source modifiers or can be selected as
// bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
// targets without SALU float we still select them as VGPR since there would
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll
index 7db49bca36062..d356153fe7360 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -global-isel -mcpu=tahiti -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefixes=GCN,GFX678 %s
-; RUN: llc -global-isel -mcpu=gfx900 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefixes=GCN,GFX9 %s
-; RUN: llc -global-isel -mcpu=gfx1010 -mtriple=amdgcn < %s | FileCheck --check-prefixes=GCN,GFX10 %s
-; RUN: llc -global-isel -mcpu=gfx1100 -mtriple=amdgcn < %s | FileCheck --check-prefixes=GCN,GFX11 %s
+; RUN: llc -global-isel -new-reg-bank-select -mcpu=tahiti -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefixes=GCN,GFX678 %s
+; RUN: llc -global-isel -new-reg-bank-select -mcpu=gfx900 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefixes=GCN,GFX9 %s
+; RUN: llc -global-isel -new-reg-bank-select -mcpu=gfx1010 -mtriple=amdgcn < %s | FileCheck --check-prefixes=GCN,GFX10 %s
+; RUN: llc -global-isel -new-reg-bank-select -mcpu=gfx1100 -mtriple=amdgcn < %s | FileCheck --check-prefixes=GCN,GFX11 %s
declare i64 @llvm.smax.i64(i64, i64)
declare i64 @llvm.smin.i64(i64, i64)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-cvt-pk-i16-i32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-cvt-pk-i16-i32.mir
new file mode 100644
index 0000000000000..656f5a2fc1e05
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-cvt-pk-i16-i32.mir
@@ -0,0 +1,41 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck %s
+
+---
+name: cvt_pk_i16_i32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: cvt_pk_i16_i32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_CVT_PK_I16_I32_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_CVT_PK_I16_I32 [[COPY2]], [[COPY3]]
+ ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[AMDGPU_CVT_PK_I16_I32_]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(<2 x s16>) = G_AMDGPU_CVT_PK_I16_I32 %0, %1
+...
+
+---
+name: cvt_pk_i16_i32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: cvt_pk_i16_i32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[AMDGPU_CVT_PK_I16_I32_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_CVT_PK_I16_I32 [[COPY]], [[COPY1]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(<2 x s16>) = G_AMDGPU_CVT_PK_I16_I32 %0, %1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-smed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-smed3.mir
new file mode 100644
index 0000000000000..a29d0bc9063e8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-smed3.mir
@@ -0,0 +1,46 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck %s
+
+---
+name: smed3_sss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: smed3_sss
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY3]], [[COPY4]], [[COPY5]]
+ ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[AMDGPU_SMED3_]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s32) = G_AMDGPU_SMED3 %0, %1, %2
+...
+
+---
+name: smed3_vvv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: smed3_vvv
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s32) = G_AMDGPU_SMED3 %0, %1, %2
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/176596
More information about the llvm-branch-commits
mailing list