[llvm] GlobalISel: Use G_UADDE when narrowing G_UMULH (PR #97194)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 30 00:35:07 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
This greatly shrinks the AMDGPU div64 expansion.
Instead of adding a zext of the condition output, add a zero and use
the carry in to G_UADDE. This is closer to how the DAG expansion using
umulh does it, and it seems more natural to leave the boolean output
as a boolean input. We should have a combine to form G_UADDE from this
pattern, but the legalizer shouldn't create extra work for the
combiner if it can help it.
The Mips cases are regressions, but the DAG lowering for muli128 seems
to not use the expansion involving MULHU/MULHS at all. The DAG output
is radically different than GlobalISel as-is, so it seems like Mips
should be using a different legalization strategy here to begin with.
The RISCV legalizer tests look worse for the mul i96 case, but those
didn't exist when I wrote this patch and forgot about it 4 years ago,
so I haven't really looked into why. We've entered the age where most tests
should just be using IR, so I don't know if this matters or not (the IR mul
test doesn't seem to cover i96)
---
Patch is 1.76 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97194.diff
18 Files Affected:
- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+10-7)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir (+9-10)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir (+1047-1260)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir (+935-1148)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir (+996-1236)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir (+60-86)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulo.mir (+86-112)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir (+884-1124)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll (+1358-1630)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll (+768-887)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll (+1605-1961)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll (+785-987)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll (+647-762)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll (+707-959)
- (modified) llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir (+100-44)
- (modified) llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll (+50-21)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv32.mir (+25-13)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-mul-ext-rv64.mir (+31-13)
``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 975f19b8596b9..fbe6e806734bd 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5716,6 +5716,7 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
ArrayRef<Register> Src1Regs,
ArrayRef<Register> Src2Regs,
LLT NarrowTy) {
+ const LLT S1 = LLT::scalar(1);
MachineIRBuilder &B = MIRBuilder;
unsigned SrcParts = Src1Regs.size();
unsigned DstParts = DstRegs.size();
@@ -5728,6 +5729,8 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
unsigned CarrySumPrevDstIdx;
SmallVector<Register, 4> Factors;
+ const Register Zero = B.buildConstant(NarrowTy, 0).getReg(0);
+
for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
// Collect low parts of muls for DstIdx.
for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
@@ -5752,15 +5755,15 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
// Add all factors and accumulate all carries into CarrySum.
if (DstIdx != DstParts - 1) {
MachineInstrBuilder Uaddo =
- B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
+ B.buildUAddo(NarrowTy, S1, Factors[0], Factors[1]);
FactorSum = Uaddo.getReg(0);
- CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
+ CarrySum = Zero;
for (unsigned i = 2; i < Factors.size(); ++i) {
- MachineInstrBuilder Uaddo =
- B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
- FactorSum = Uaddo.getReg(0);
- MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
- CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
+ auto Uadde =
+ B.buildUAdde(NarrowTy, S1, FactorSum, Factors[i], Uaddo.getReg(1));
+ FactorSum = Uadde.getReg(0);
+ CarrySum = B.buildUAdde(NarrowTy, S1, CarrySum, Zero, Uadde.getReg(1))
+ .getReg(0);
}
} else {
// Since value for the next index is not calculated, neither is CarrySum.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir
index 2bf8649e76242..a79f1db9b8cb2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir
@@ -619,25 +619,24 @@ body: |
; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
; GFX6-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96)
; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]]
; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]]
; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]]
- ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
- ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
- ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
- ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+ ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UADDO]], [[UMULH]], [[UADDO1]]
+ ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[C]], [[C]], [[UADDE1]]
; GFX6-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]]
; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]]
; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]]
; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]]
- ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]]
- ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]]
- ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]]
- ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]]
- ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]]
- ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32)
+ ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]]
+ ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[MUL5]]
+ ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[UMULH1]]
+ ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH2]]
+ ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UADDE2]]
+ ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDE]](s32), [[ADD4]](s32)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
;
; GFX89-LABEL: name: test_mul_s96
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
index f9ec3bca78931..81e13b6cf6745 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
@@ -49,6 +49,7 @@ body: |
; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
; GFX6-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+ ;
; GFX8-LABEL: name: test_sdiv_s32
; GFX8: liveins: $vgpr0, $vgpr1
; GFX8-NEXT: {{ $}}
@@ -87,6 +88,7 @@ body: |
; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
; GFX8-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+ ;
; GFX9-LABEL: name: test_sdiv_s32
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
@@ -125,6 +127,7 @@ body: |
; GFX9-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
; GFX9-NEXT: $vgpr0 = COPY [[SUB3]](s32)
+ ;
; GFX10-LABEL: name: test_sdiv_s32
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10-NEXT: {{ $}}
@@ -244,6 +247,7 @@ body: |
; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ;
; GFX8-LABEL: name: test_sdiv_v2s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8-NEXT: {{ $}}
@@ -313,6 +317,7 @@ body: |
; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32)
; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ;
; GFX9-LABEL: name: test_sdiv_v2s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
@@ -382,6 +387,7 @@ body: |
; GFX9-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]]
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ;
; GFX10-LABEL: name: test_sdiv_v2s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10-NEXT: {{ $}}
@@ -506,6 +512,7 @@ body: |
; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]]
; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]]
; GFX6-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]]
+ ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX6-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]]
; GFX6-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]]
; GFX6-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]]
@@ -515,89 +522,73 @@ body: |
; GFX6-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]]
; GFX6-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]]
- ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1)
- ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]]
- ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1)
- ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+ ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UADDO4]], [[UMULH1]], [[UADDO5]]
+ ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE5]]
; GFX6-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]]
; GFX6-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]]
; GFX6-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]]
- ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
- ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1)
- ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]]
- ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1)
- ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]]
- ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]]
- ; GFX6-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1)
- ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]]
+ ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]]
+ ; GFX6-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDO6]], [[UMULH3]], [[UADDO7]]
+ ; GFX6-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE9]]
+ ; GFX6-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE8]], [[UADDE6]], [[UADDO7]]
+ ; GFX6-NEXT: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[C6]], [[UADDE13]]
; GFX6-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]]
- ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]]
- ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]]
- ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]]
- ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]]
- ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]]
- ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]]
- ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]]
- ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
- ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH5]]
- ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]]
- ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD7]]
- ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]]
- ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
- ; GFX6-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1)
- ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]]
- ; GFX6-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1)
- ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]]
- ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD7]]
- ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]]
- ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD7]]
- ; GFX6-NEXT: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
- ; GFX6-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1)
- ; GFX6-NEXT: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]]
- ; GFX6-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1)
- ; GFX6-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]]
- ; GFX6-NEXT: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD8]]
- ; GFX6-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1)
- ; GFX6-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[ZEXT9]]
- ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD7]]
- ; GFX6-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD10]]
- ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX6-NEXT: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]]
- ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UADDE4]], [[ADD11]], [[UADDO27]]
+ ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[UADDE14]]
+ ; GFX6-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDE12]]
+ ; GFX6-NEXT: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD2]], [[UADDO9]]
+ ; GFX6-NEXT: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO8]]
+ ; GFX6-NEXT: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO8]]
+ ; GFX6-NEXT: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE16]]
+ ; GFX6-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO8]]
+ ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]]
+ ; GFX6-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH5]]
+ ; GFX6-NEXT: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[MUL6]]
+ ; GFX6-NEXT: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO8]], [[ADD4]]
+ ; GFX6-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[MUL6]]
+ ; GFX6-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]]
+ ; GFX6-NEXT: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDO10]], [[UMULH6]], [[UADDO11]]
+ ; GFX6-NEXT: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE19]]
+ ; GFX6-NEXT: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE16]], [[ADD4]]
+ ; GFX6-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[MUL6]]
+ ; GFX6-NEXT: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO8]], [[ADD4]]
+ ; GFX6-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]]
+ ; GFX6-NEXT: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDO12]], [[UMULH8]], [[UADDO13]]
+ ; GFX6-NEXT: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE23]]
+ ; GFX6-NEXT: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE22]], [[UADDE20]], [[UADDO13]]
+ ; GFX6-NEXT: [[UADDE28:%[0-9]+]]:_(s32), [[UADDE29:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[C6]], [[UADDE27]]
+ ; GFX6-NEXT: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE16]], [[ADD4]]
+ ; GFX6-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[UADDE28]]
+ ; GFX6-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UADDE26]]
+ ; GFX6-NEXT: [[UADDE30:%[0-9]+]]:_(s32), [[UADDE31:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[ADD5]], [[UADDO15]]
; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64)
- ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]]
- ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE6]]
- ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]]
- ; GFX6-NEXT: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
- ; GFX6-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1)
- ; GFX6-NEXT: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]]
- ; GFX6-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1)
- ; GFX6-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]]
- ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE6]]
- ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]]
- ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE6]]
- ; GFX6-NEXT: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
- ; GFX6-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1)
- ; GFX6-NEXT: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]]
- ; GFX6-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1)
- ; GFX6-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]]
- ; GFX6-NEXT: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD12]]
- ; GFX6-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1)
- ; GFX6-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[ZEXT14]]
- ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE6]]
- ; GFX6-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD14]]
- ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD15]](s32)
+ ; GFX6-NEXT: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO14]]
+ ; GFX6-NEXT: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE30]]
+ ; GFX6-NEXT: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO14]]
+ ; GFX6-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]]
+ ; GFX6-NEXT: [[UADDE32:%[0-9]+]]:_(s32), [[UADDE33:%[0-9]+]]:_(s1) = G_UADDE [[UADDO16]], [[UMULH10]], [[UADDO17]]
+ ; GFX6-NEXT: [[UADDE34:%[0-9]+]]:_(s32), [[UADDE35:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE33]]
+ ; GFX6-NEXT: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE30]]
+ ; GFX6-NEXT: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO14]]
+ ; GFX6-NEXT: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE30]]
+ ; GFX6-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]]
+ ; GFX6-NEXT: [[UADDE36:%[0-9]+]]:_(s32), [[UADDE37:%[0-9]+]]:_(s1) = G_UADDE [[UADDO18]], [[UMULH12]], [[UADDO19]]
+ ; GFX6-NEXT: [[UADDE38:%[0-9]+]]:_(s32), [[UADDE39:%[0-9]+]]:_(s1) = G_UADDE [[C6]], [[C6]], [[UADDE37]]
+ ; GFX6-NEXT: [[UADDE40:%[0-9]+]]:_(s32), [[UADDE41:%[0-9]+]]:_(s1) = G_UADDE [[UADDE36]], [[UADDE34]], [[UADDO19]]
+ ; GFX6-NEXT: [[UADDE42:%[0-9]+]]:_(s32), [[UADDE43:%[0-9]+]]:_(s1) = G_UADDE [[UADDE38]], [[C6]], [[UADDE41]]
+ ; GFX6-NEXT: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE30]]
+ ; GFX6-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[UADDE42]]
+ ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDE40]](s32), [[ADD6]](s32)
; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64)
- ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]]
- ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]]
- ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD15]]
- ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]]
- ; GFX6-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
- ; GFX6-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH14]]
+ ; GFX6-NEXT: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDE40]]
+ ; GFX6-NEXT: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDE40]]
+ ; GFX6-NEXT: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD6]]
+ ; GFX6-NEXT: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDE40]]
+ ; GFX6-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]]
+ ; GFX6-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH14]]
; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]]
- ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD17]], [[USUBO3]]
- ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD17]]
+ ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32),...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/97194
More information about the llvm-commits
mailing list