[llvm-branch-commits] [llvm] [AMDGPU][GlobalISel] Add RegBankLegalize support for G_AMDGPU_S_MUL_* (PR #175888)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jan 13 21:40:58 PST 2026
https://github.com/vangthao95 created https://github.com/llvm/llvm-project/pull/175888
Patch 3 of 4 patches to implement full G_MUL support in regbanklegalize.
Current mul.ll test is only partially updated and expected to fail.
It will be updated in the fourth patch.
>From fcdae3795584468953f6b9ea54f58db24dd24c2d Mon Sep 17 00:00:00 2001
From: Vang Thao <vang.thao at amd.com>
Date: Tue, 13 Jan 2026 20:36:55 -0800
Subject: [PATCH] [AMDGPU][GlobalISel] Add RegBankLegalize support for
G_AMDGPU_S_MUL_*
Patch 3 of 4 patches to implement full G_MUL support in regbanklegalize.
Current mul.ll test is only partially updated and expected to fail.
It will be updated in the fourth patch.
---
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 19 ++++
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 4 +
.../AMDGPU/AMDGPURegBankLegalizeRules.h | 2 +
.../AMDGPU/GlobalISel/regbankselect-smul.mir | 92 +++++++++++++++++++
4 files changed, 117 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smul.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 8cea1fa98cd02..1a8bd6d8de261 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -985,6 +985,25 @@ bool RegBankLegalizeHelper::lower(MachineInstr &MI,
return lowerS_BFE(MI);
case UniMAD64:
return lowerUniMAD64(MI);
+ case S_Mul64: {
+ B.buildMul(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2));
+ MI.eraseFromParent();
+ return true;
+ }
+ case S_Mul64Div: {
+ auto Op1 = B.buildTrunc(VgprRB_S32, MI.getOperand(1));
+ auto Op2 = B.buildTrunc(VgprRB_S32, MI.getOperand(2));
+ auto Zero = B.buildConstant({VgprRB, S64}, 0);
+
+ unsigned NewOpc = MI.getOpcode() == AMDGPU::G_AMDGPU_S_MUL_U64_U32
+ ? AMDGPU::G_AMDGPU_MAD_U64_U32
+ : AMDGPU::G_AMDGPU_MAD_I64_I32;
+
+ B.buildInstr(NewOpc, {MI.getOperand(0).getReg(), {SgprRB, S32}},
+ {Op1, Op2, Zero});
+ MI.eraseFromParent();
+ return true;
+ }
case SplitTo32:
return lowerSplitTo32(MI);
case SplitTo32Select:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 991a85b670a76..5a03f6b5463ad 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -505,6 +505,10 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Div(S64, {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}})
.Uni(S64, {{Sgpr64, SgprS1}, {Sgpr32, Sgpr32, Sgpr64}, UniMAD64});
+ addRulesForGOpcs({G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32}, Standard)
+ .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}, S_Mul64})
+ .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}, S_Mul64Div});
+
addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
.Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}})
.Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 77ed0b7fe7920..b5fd6683d319b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -226,6 +226,8 @@ enum LoweringMethodID {
V_BFE,
VgprToVccCopy,
UniMAD64,
+ S_Mul64,
+ S_Mul64Div,
SplitTo32,
ScalarizeToS16,
SplitTo32Select,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smul.mir
new file mode 100644
index 0000000000000..ffec314968f85
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smul.mir
@@ -0,0 +1,92 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck %s
+
+---
+name: s_mul_u64_u32_ss
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: s_mul_u64_u32_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:sgpr(s64) = G_MUL [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MUL]](s64)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_AMDGPU_S_MUL_U64_U32 %0, %1
+ $vgpr0_vgpr1 = COPY %2
+ S_ENDPGM 0, implicit $vgpr0_vgpr1
+...
+
+---
+name: s_mul_u64_u32_vv
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: s_mul_u64_u32_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s32) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s32) = G_TRUNC [[COPY1]](s64)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_MAD_U64_U32 [[TRUNC]](s32), [[TRUNC1]], [[C]]
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AMDGPU_MAD_U64_U32_]](s64)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_AMDGPU_S_MUL_U64_U32 %0, %1
+ $vgpr0_vgpr1 = COPY %2
+ S_ENDPGM 0, implicit $vgpr0_vgpr1
+...
+
+---
+name: s_mul_i64_i32_ss
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: s_mul_i64_i32_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[MUL:%[0-9]+]]:sgpr(s64) = G_MUL [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MUL]](s64)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_AMDGPU_S_MUL_I64_I32 %0, %1
+ $vgpr0_vgpr1 = COPY %2
+ S_ENDPGM 0, implicit $vgpr0_vgpr1
+...
+
+---
+name: s_mul_i64_i32_vv
+legalized: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: s_mul_i64_i32_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s32) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s32) = G_TRUNC [[COPY1]](s64)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_MAD_I64_I32 [[TRUNC]](s32), [[TRUNC1]], [[C]]
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AMDGPU_MAD_I64_I32_]](s64)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_AMDGPU_S_MUL_I64_I32 %0, %1
+ $vgpr0_vgpr1 = COPY %2
+ S_ENDPGM 0, implicit $vgpr0_vgpr1
+...
More information about the llvm-branch-commits
mailing list