[llvm-branch-commits] [llvm] [AMDGPU][GlobalISel] Add RegBankLegalize support for G_AMDGPU_MAD_* (PR #175887)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jan 13 21:43:19 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: None (vangthao95)
<details>
<summary>Changes</summary>
Patch 2 of 4 patches to implement full G_MUL support in regbanklegalize.
Current mul.ll test is only partially updated and expected to fail.
It will be updated in the fourth patch.
---
Patch is 30.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/175887.diff
5 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp (+131)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h (+1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp (+4)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h (+2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir (+47-162)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index f21b87c8f92f0..8cea1fa98cd02 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -19,6 +19,7 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
@@ -716,6 +717,131 @@ bool RegBankLegalizeHelper::lowerSplitTo16(MachineInstr &MI) {
return true;
}
+bool RegBankLegalizeHelper::lowerUniMAD64(MachineInstr &MI) {
+ Register Dst0 = MI.getOperand(0).getReg();
+ Register Dst1 = MI.getOperand(1).getReg();
+ Register Src0 = MI.getOperand(2).getReg();
+ Register Src1 = MI.getOperand(3).getReg();
+ Register Src2 = MI.getOperand(4).getReg();
+
+ bool IsUnsigned = MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
+
+ bool DstOnValu = MRI.getRegBankOrNull(Src2) == VgprRB;
+ bool Accumulate = true;
+
+ if (!DstOnValu) {
+ if (mi_match(Src2, MRI, MIPatternMatch::m_ZeroInt()))
+ Accumulate = false;
+ }
+
+ // Keep the multiplication on the SALU.
+ Register DstHi;
+ Register DstLo = B.buildMul({SgprRB, S32}, Src0, Src1).getReg(0);
+ bool MulHiInVgpr = false;
+
+ const GCNSubtarget &ST = B.getMF().getSubtarget<GCNSubtarget>();
+ unsigned MulHOpc = IsUnsigned ? AMDGPU::G_UMULH : AMDGPU::G_SMULH;
+
+ if (ST.hasScalarMulHiInsts()) {
+ DstHi = B.buildInstr(MulHOpc, {{SgprRB, S32}}, {Src0, Src1}).getReg(0);
+ } else {
+ Register VSrc0 = B.buildCopy({VgprRB, S32}, Src0).getReg(0);
+ Register VSrc1 = B.buildCopy({VgprRB, S32}, Src1).getReg(0);
+
+ DstHi = B.buildInstr(MulHOpc, {{VgprRB, S32}}, {VSrc0, VSrc1}).getReg(0);
+
+ if (!DstOnValu) {
+ Register DstHiSgpr =
+ MRI.createVirtualRegister({SgprRB, MRI.getType(DstHi)});
+ buildReadAnyLane(B, DstHiSgpr, DstHi, RBI);
+ DstHi = DstHiSgpr;
+ } else {
+ MulHiInVgpr = true;
+ }
+ }
+
+ // Accumulate and produce the "carry-out" bit.
+
+ // The "carry-out" is defined as bit 64 of the result when computed as a
+ // big integer. For unsigned multiply-add, this matches the usual
+ // definition of carry-out. For signed multiply-add, bit 64 is the sign
+ // bit of the result, which is determined as:
+ // sign(Src0 * Src1) + sign(Src2) + carry-out from unsigned 64-bit add
+ LLT CarryType = DstOnValu ? S1 : S32;
+ const RegisterBank &CarryBank = DstOnValu ? *VccRB : *SgprRB;
+ const RegisterBank &DstBank = DstOnValu ? *VgprRB : *SgprRB;
+ Register Carry;
+ Register Zero;
+
+ if (!IsUnsigned) {
+ // Register Zero, Carry;
+
+ if (MulHiInVgpr) {
+ Zero = MRI.createVirtualRegister({VgprRB, S32});
+ Carry = MRI.createVirtualRegister({VccRB, S1});
+ } else {
+ Zero = MRI.createVirtualRegister({SgprRB, S32});
+ Carry = MRI.createVirtualRegister({SgprRB, S32});
+ }
+
+ B.buildConstant(Zero, 0);
+ B.buildICmp(CmpInst::ICMP_SLT, Carry, DstHi, Zero);
+
+ if (DstOnValu && !MulHiInVgpr) {
+ Carry = B.buildTrunc({VccRB, S1}, Carry).getReg(0);
+ }
+ }
+
+ if (Accumulate) {
+ if (DstOnValu) {
+ DstLo = B.buildCopy({VgprRB, S32}, DstLo).getReg(0);
+ DstHi = B.buildCopy({VgprRB, S32}, DstHi).getReg(0);
+ }
+
+ Register Src2Lo = MRI.createVirtualRegister({&DstBank, S32});
+ Register Src2Hi = MRI.createVirtualRegister({&DstBank, S32});
+ B.buildUnmerge({Src2Lo, Src2Hi}, Src2);
+
+ if (!IsUnsigned) {
+ Register Src2Sign = MRI.createVirtualRegister({&CarryBank, CarryType});
+ Register XorCarry = MRI.createVirtualRegister({&CarryBank, CarryType});
+ B.buildICmp(CmpInst::ICMP_SLT, Src2Sign, Src2Hi, Zero);
+ Carry = B.buildXor(XorCarry, Carry, Src2Sign).getReg(0);
+ }
+ Register AddLo = MRI.createVirtualRegister({&DstBank, S32});
+ Register CarryLo = MRI.createVirtualRegister({&CarryBank, CarryType});
+ DstLo = B.buildUAddo(AddLo, CarryLo, DstLo, Src2Lo).getReg(0);
+
+ Register AddHi = MRI.createVirtualRegister({&DstBank, S32});
+ Register CarryHi = MRI.createVirtualRegister({&CarryBank, CarryType});
+
+ DstHi = B.buildUAdde(AddHi, CarryHi, DstHi, Src2Hi, CarryLo).getReg(0);
+
+ if (IsUnsigned) {
+ Carry = CarryHi;
+ } else {
+ Register CarryXor = MRI.createVirtualRegister({&CarryBank, CarryType});
+ Carry = B.buildXor(CarryXor, Carry, CarryHi).getReg(0);
+ }
+ } else {
+ if (IsUnsigned) {
+ Register CarryZero = MRI.createVirtualRegister({&CarryBank, CarryType});
+ Carry = B.buildConstant(CarryZero, 0).getReg(0);
+ }
+ }
+
+ B.buildMergeLikeInstr(Dst0, {DstLo, DstHi});
+
+ if (DstOnValu) {
+ B.buildCopy(Dst1, Carry);
+ } else {
+ if (!MRI.use_empty(Dst1))
+ B.buildTrunc(Dst1, Carry);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
bool RegBankLegalizeHelper::lowerSplitTo32Select(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(Dst);
@@ -857,6 +983,8 @@ bool RegBankLegalizeHelper::lower(MachineInstr &MI,
return lowerV_BFE(MI);
case S_BFE:
return lowerS_BFE(MI);
+ case UniMAD64:
+ return lowerUniMAD64(MI);
case SplitTo32:
return lowerSplitTo32(MI);
case SplitTo32Select:
@@ -933,6 +1061,7 @@ bool RegBankLegalizeHelper::lower(MachineInstr &MI,
LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
switch (ID) {
+ case SgprS1:
case Vcc:
case UniInVcc:
return LLT::scalar(1);
@@ -1057,6 +1186,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
switch (ID) {
case Vcc:
return VccRB;
+ case SgprS1:
case Sgpr16:
case Sgpr32:
case Sgpr32_WF:
@@ -1144,6 +1274,7 @@ bool RegBankLegalizeHelper::applyMappingDst(
switch (MethodIDs[OpIdx]) {
// vcc, sgpr and vgpr scalars, pointers and vectors
case Vcc:
+ case SgprS1:
case Sgpr16:
case Sgpr32:
case Sgpr64:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
index 1dc0278d6d90d..f92ed3de6cf27 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
@@ -125,6 +125,7 @@ class RegBankLegalizeHelper {
bool lowerUnpackBitShift(MachineInstr &MI);
bool lowerV_BFE(MachineInstr &MI);
bool lowerS_BFE(MachineInstr &MI);
+ bool lowerUniMAD64(MachineInstr &MI);
bool lowerSplitTo32(MachineInstr &MI);
bool lowerSplitTo16(MachineInstr &MI);
bool lowerSplitTo32Select(MachineInstr &MI);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 6636af46ec879..991a85b670a76 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -501,6 +501,10 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasMulHi)
.Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasMulHi);
+ addRulesForGOpcs({G_AMDGPU_MAD_U64_U32, G_AMDGPU_MAD_I64_I32}, Standard)
+ .Div(S64, {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}})
+ .Uni(S64, {{Sgpr64, SgprS1}, {Sgpr32, Sgpr32, Sgpr64}, UniMAD64});
+
addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
.Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}})
.Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 1ac117304b76f..77ed0b7fe7920 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -134,6 +134,7 @@ enum RegBankLLTMappingApplyID {
Vcc,
// sgpr scalars, pointers, vectors and B-types
+ SgprS1,
Sgpr16,
Sgpr32,
Sgpr64,
@@ -224,6 +225,7 @@ enum LoweringMethodID {
S_BFE,
V_BFE,
VgprToVccCopy,
+ UniMAD64,
SplitTo32,
ScalarizeToS16,
SplitTo32Select,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir
index cce4beacafdb2..3d7b94613d15b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir
@@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=CHECK,GFX8 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=CHECK,GFX9MI %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=CHECK,GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck --check-prefixes=CHECK,GFX8 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck --check-prefixes=CHECK,GFX9MI %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck --check-prefixes=CHECK,GFX10 %s
---
name: mad_u64_u32_sss
@@ -10,8 +10,6 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
- ;
- ;
; GFX8-LABEL: name: mad_u64_u32_sss
; GFX8: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
; GFX8-NEXT: {{ $}}
@@ -23,13 +21,13 @@ body: |
; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[COPY4]], [[COPY5]]
- ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UMULH]](s32), implicit $exec
+ ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY4]], [[COPY5]]
+ ; GFX8-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UMULH]]
; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]]
- ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[V_READFIRSTLANE_B32_]], [[UV1]], [[UADDO1]]
+ ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[AMDGPU_READANYLANE]], [[UV1]], [[UADDO1]]
; GFX8-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
- ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32)
+ ;
; GFX9MI-LABEL: name: mad_u64_u32_sss
; GFX9MI: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
; GFX9MI-NEXT: {{ $}}
@@ -44,7 +42,7 @@ body: |
; GFX9MI-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]]
; GFX9MI-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[UMULH]], [[UV1]], [[UADDO1]]
; GFX9MI-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
- ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32)
+ ;
; GFX10-LABEL: name: mad_u64_u32_sss
; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
; GFX10-NEXT: {{ $}}
@@ -59,7 +57,6 @@ body: |
; GFX10-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]]
; GFX10-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[UMULH]], [[UV1]], [[UADDO1]]
; GFX10-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
- ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
@@ -75,55 +72,17 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- ;
- ;
- ; GFX8-LABEL: name: mad_u64_u32_ssv
- ; GFX8: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- ; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
- ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY4]], [[COPY5]]
- ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32)
- ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32)
- ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
- ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY6]], [[UV]]
- ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]]
- ; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
- ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1)
- ; GFX9MI-LABEL: name: mad_u64_u32_ssv
- ; GFX9MI: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- ; GFX9MI-NEXT: {{ $}}
- ; GFX9MI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
- ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY5]], [[MV]]
- ; GFX10-LABEL: name: mad_u64_u32_ssv
- ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
- ; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
- ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
- ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
- ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32)
- ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32)
- ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
- ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY4]], [[UV]]
- ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]]
- ; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
- ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1)
+ ; CHECK-LABEL: name: mad_u64_u32_ssv
+ ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY5]], [[MV]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $vgpr0
@@ -139,8 +98,6 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0, $sgpr1, $sgpr2
- ;
- ;
; CHECK-LABEL: name: mad_u64_u32_svs
; CHECK: liveins: $sgpr0, $vgpr0, $sgpr1, $sgpr2
; CHECK-NEXT: {{ $}}
@@ -167,8 +124,6 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2
- ;
- ;
; CHECK-LABEL: name: mad_u64_u32_svv
; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
@@ -194,8 +149,6 @@ legalized: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2
- ;
- ;
; CHECK-LABEL: name: mad_u64_u32_vss
; CHECK: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2
; CHECK-NEXT: {{ $}}
@@ -222,8 +175,6 @@ legalized: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0, $vgpr1, $vgpr2
- ;
- ;
; CHECK-LABEL: name: mad_u64_u32_vsv
; CHECK: liveins: $vgpr0, $sgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
@@ -249,8 +200,6 @@ legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1
- ;
- ;
; CHECK-LABEL: name: mad_u64_u32_vvs
; CHECK: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1
; CHECK-NEXT: {{ $}}
@@ -276,8 +225,6 @@ legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ;
- ;
; CHECK-LABEL: name: mad_u64_u32_vvv
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-NEXT: {{ $}}
@@ -302,8 +249,6 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
- ;
- ;
; GFX8-LABEL: name: mad_i64_i32_sss
; GFX8: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
; GFX8-NEXT: {{ $}}
@@ -315,18 +260,18 @@ body: |
; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
- ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[COPY4]], [[COPY5]]
- ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[SMULH]](s32), implicit $exec
+ ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY4]], [[COPY5]]
+ ; GFX8-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[SMULH]]
; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
- ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](s32), [[C]]
+ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[AMDGPU_READANYLANE]](s32), [[C]]
; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
; GFX8-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]]
; GFX8-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]]
; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]]
- ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[V_READFIRSTLANE_B32_]], [[UV1]], [[UADDO1]]
+ ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[AMDGPU_READANYLANE]], [[UV1]], [[UADDO1]]
; GFX8-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]]
; GFX8-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
- ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32)
+ ;
; GFX9MI-LABEL: name: mad_i64_i32_sss
; GFX9MI: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
; GFX9MI-NEXT: {{ $}}
@@ -346,7 +291,7 @@ body: |
; GFX9MI-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[SMULH]], [[UV1]], [[UADDO1]]
; GFX9MI-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]]
; GFX9MI-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
- ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32)
+ ;
; GFX10...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/175887
More information about the llvm-branch-commits
mailing list