[llvm] [WIP][TableGen][GISel] Learn to import patterns with optional/physreg defs (PR #120343)
Sergei Barannikov via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 17 17:27:57 PST 2024
https://github.com/s-barannikov created https://github.com/llvm/llvm-project/pull/120343
None
>From 3bdc2f28d485445657271ba6587203c0412c9b58 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <s.barannikov at module.ru>
Date: Wed, 18 Dec 2024 02:05:14 +0300
Subject: [PATCH 1/2] [TableGen][GISel] Import more "multi-level" patterns
Previously, if the destination DAG has an untyped leaf, we would import
the pattern only if that leaf is defined by the *top-level* source DAG.
This is an unnecessary restriction.
Here is an example of such pattern:
```
def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC),
(VMLADDUHM $vA, $vB, $vC)>;
```
Previously, it failed to import because `add` doesn't define neither
`$vA` nor `$vB`.
This change reduces the number of skipped patterns as follows:
```
AArch64: 8695 -> 8548
AMDGPU: 11333 -> 11240
ARM: 4297 -> 4278
PowerPC: 3955 -> 3010
```
Other GISel-enabled targets are unaffected.
---
.../GlobalISel/inst-select-ashr.s16.mir | 14 +-
.../AMDGPU/GlobalISel/inst-select-ctpop.mir | 25 +-
.../GlobalISel/inst-select-lshr.s16.mir | 14 +-
.../AMDGPU/GlobalISel/inst-select-shl.s16.mir | 14 +-
llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll | 170 +-
llvm/test/CodeGen/AMDGPU/constrained-shift.ll | 4 -
.../CodeGen/AMDGPU/integer-mad-patterns.ll | 1612 +++++++----------
llvm/utils/TableGen/GlobalISelEmitter.cpp | 11 +-
8 files changed, 821 insertions(+), 1043 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir
index fb7c2d4d705e75..95d2bae98df2e1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir
@@ -274,24 +274,18 @@ body: |
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
- ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_e64_]], implicit $exec
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ASHRREV_I16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX9-LABEL: name: ashr_s16_vv_zext_to_s64
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
- ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_e64_]], implicit $exec
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ASHRREV_I16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX10-LABEL: name: ashr_s16_vv_zext_to_s64
; GFX10: liveins: $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir
index 779312596313a3..3a2ed71e4d2242 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir
@@ -79,9 +79,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY1]], 0, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+ ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = G_CTPOP %0
@@ -104,9 +103,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY1]], [[V_BCNT_U32_B32_e64_]], 0, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+ ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = G_CTPOP %0
@@ -155,9 +153,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY1]], 0, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+ ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s32) = G_CTPOP %0
@@ -181,9 +178,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY1]], 0, implicit $exec
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY]], 0, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+ ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s32) = G_CTPOP %1
@@ -207,9 +203,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; CHECK-NEXT: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def dead $scc
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_BCNT1_I32_B32_]], [[COPY1]], 0, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+ ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_CTPOP %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir
index e7ec5fcbba2473..a96b574a647848 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir
@@ -272,24 +272,18 @@ body: |
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
- ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_e64_]], implicit $exec
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHRREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX9-LABEL: name: lshr_s16_vv_zext_to_s64
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
- ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_e64_]], implicit $exec
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHRREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX10-LABEL: name: lshr_s16_vv_zext_to_s64
; GFX10: liveins: $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir
index bcb6d75c18302b..b0703a642e033a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir
@@ -272,24 +272,18 @@ body: |
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
- ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHLREV_B16_e64_]], implicit $exec
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHLREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX9-LABEL: name: shl_s16_vv_zext_to_s64
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
- ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHLREV_B16_e64_]], implicit $exec
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHLREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX10-LABEL: name: shl_s16_vv_zext_to_s64
; GFX10: liveins: $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
index 6bb4e2d3dbe26e..ed85fb19d90517 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
@@ -204,18 +204,37 @@ define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b)
}
define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) {
-; GCN-LABEL: vector_xnor_i32_one_use:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
-; GCN-NEXT: v_not_b32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX7-LABEL: vector_xnor_i32_one_use:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1
+; GFX7-NEXT: v_not_b32_e32 v0, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: vector_xnor_i32_one_use:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1
+; GFX8-NEXT: v_not_b32_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: vector_xnor_i32_one_use:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1
+; GFX900-NEXT: v_not_b32_e32 v0, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-LABEL: vector_xnor_i32_one_use:
+; GFX906: ; %bb.0: ; %entry
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v1
+; GFX906-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: vector_xnor_i32_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1
-; GFX10-NEXT: v_not_b32_e32 v0, v0
+; GFX10-NEXT: v_xnor_b32_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%xor = xor i32 %a, %b
@@ -224,22 +243,45 @@ entry:
}
define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) {
-; GCN-LABEL: vector_xnor_i64_one_use:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_xor_b32_e32 v0, v0, v2
-; GCN-NEXT: v_xor_b32_e32 v1, v1, v3
-; GCN-NEXT: v_not_b32_e32 v0, v0
-; GCN-NEXT: v_not_b32_e32 v1, v1
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX7-LABEL: vector_xnor_i64_one_use:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_xor_b32_e32 v0, v0, v2
+; GFX7-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX7-NEXT: v_not_b32_e32 v0, v0
+; GFX7-NEXT: v_not_b32_e32 v1, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: vector_xnor_i64_one_use:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_xor_b32_e32 v0, v0, v2
+; GFX8-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX8-NEXT: v_not_b32_e32 v0, v0
+; GFX8-NEXT: v_not_b32_e32 v1, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: vector_xnor_i64_one_use:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_xor_b32_e32 v0, v0, v2
+; GFX900-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX900-NEXT: v_not_b32_e32 v0, v0
+; GFX900-NEXT: v_not_b32_e32 v1, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-LABEL: vector_xnor_i64_one_use:
+; GFX906: ; %bb.0: ; %entry
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v2
+; GFX906-NEXT: v_xnor_b32_e32 v1, v1, v3
+; GFX906-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: vector_xnor_i64_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2
-; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
-; GFX10-NEXT: v_not_b32_e32 v0, v0
-; GFX10-NEXT: v_not_b32_e32 v1, v1
+; GFX10-NEXT: v_xnor_b32_e32 v0, v0, v2
+; GFX10-NEXT: v_xnor_b32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%xor = xor i64 %a, %b
@@ -248,16 +290,32 @@ entry:
}
define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
-; GCN-LABEL: xnor_s_v_i32_one_use:
-; GCN: ; %bb.0:
-; GCN-NEXT: v_xor_b32_e32 v0, s0, v0
-; GCN-NEXT: v_not_b32_e32 v0, v0
-; GCN-NEXT: ; return to shader part epilog
+; GFX7-LABEL: xnor_s_v_i32_one_use:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX7-NEXT: v_not_b32_e32 v0, v0
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: xnor_s_v_i32_one_use:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX900-LABEL: xnor_s_v_i32_one_use:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX900-NEXT: v_not_b32_e32 v0, v0
+; GFX900-NEXT: ; return to shader part epilog
+;
+; GFX906-LABEL: xnor_s_v_i32_one_use:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0
+; GFX906-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: xnor_s_v_i32_one_use:
; GFX10: ; %bb.0:
-; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX10-NEXT: v_not_b32_e32 v0, v0
+; GFX10-NEXT: v_xnor_b32_e32 v0, s0, v0
; GFX10-NEXT: ; return to shader part epilog
%xor = xor i32 %s, %v
%d = xor i32 %xor, -1
@@ -266,16 +324,32 @@ define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
}
define amdgpu_ps float @xnor_v_s_i32_one_use(i32 inreg %s, i32 %v) {
-; GCN-LABEL: xnor_v_s_i32_one_use:
-; GCN: ; %bb.0:
-; GCN-NEXT: v_xor_b32_e32 v0, s0, v0
-; GCN-NEXT: v_not_b32_e32 v0, v0
-; GCN-NEXT: ; return to shader part epilog
+; GFX7-LABEL: xnor_v_s_i32_one_use:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX7-NEXT: v_not_b32_e32 v0, v0
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: xnor_v_s_i32_one_use:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX900-LABEL: xnor_v_s_i32_one_use:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX900-NEXT: v_not_b32_e32 v0, v0
+; GFX900-NEXT: ; return to shader part epilog
+;
+; GFX906-LABEL: xnor_v_s_i32_one_use:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0
+; GFX906-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: xnor_v_s_i32_one_use:
; GFX10: ; %bb.0:
-; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX10-NEXT: v_not_b32_e32 v0, v0
+; GFX10-NEXT: v_xnor_b32_e64 v0, v0, s0
; GFX10-NEXT: ; return to shader part epilog
%xor = xor i32 %v, %s
%d = xor i32 %xor, -1
@@ -314,19 +388,15 @@ define amdgpu_ps <2 x float> @xnor_i64_s_v_one_use(i64 inreg %a, i64 %b64) {
; GFX906-LABEL: xnor_i64_s_v_one_use:
; GFX906: ; %bb.0: ; %entry
; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
-; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1
-; GFX906-NEXT: v_not_b32_e32 v0, v0
-; GFX906-NEXT: v_not_b32_e32 v1, v1
+; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0
+; GFX906-NEXT: v_xnor_b32_e32 v1, s1, v1
; GFX906-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: xnor_i64_s_v_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
-; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1
-; GFX10-NEXT: v_not_b32_e32 v0, v0
-; GFX10-NEXT: v_not_b32_e32 v1, v1
+; GFX10-NEXT: v_xnor_b32_e32 v0, s0, v0
+; GFX10-NEXT: v_xnor_b32_e32 v1, s1, v1
; GFX10-NEXT: ; return to shader part epilog
entry:
%b = shl i64 %b64, 29
@@ -367,19 +437,15 @@ define amdgpu_ps <2 x float> @xnor_i64_v_s_one_use(i64 inreg %a, i64 %b64) {
; GFX906-LABEL: xnor_i64_v_s_one_use:
; GFX906: ; %bb.0:
; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
-; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1
-; GFX906-NEXT: v_not_b32_e32 v0, v0
-; GFX906-NEXT: v_not_b32_e32 v1, v1
+; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0
+; GFX906-NEXT: v_xnor_b32_e64 v1, v1, s1
; GFX906-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: xnor_i64_v_s_one_use:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
-; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1
-; GFX10-NEXT: v_not_b32_e32 v0, v0
-; GFX10-NEXT: v_not_b32_e32 v1, v1
+; GFX10-NEXT: v_xnor_b32_e64 v0, v0, s0
+; GFX10-NEXT: v_xnor_b32_e64 v1, v1, s1
; GFX10-NEXT: ; return to shader part epilog
%b = shl i64 %b64, 29
%xor = xor i64 %b, %a
@@ -419,7 +485,7 @@ define i32 @vector_xor_na_b_i32_one_use(i32 %a, i32 %b) {
; GFX10-LABEL: vector_xor_na_b_i32_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_xor3_b32 v0, v0, -1, v1
+; GFX10-NEXT: v_xnor_b32_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%na = xor i32 %a, -1
@@ -458,7 +524,7 @@ define i32 @vector_xor_a_nb_i32_one_use(i32 %a, i32 %b) {
; GFX10-LABEL: vector_xor_a_nb_i32_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_xor3_b32 v0, v1, -1, v0
+; GFX10-NEXT: v_xnor_b32_e32 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%nb = xor i32 %b, -1
diff --git a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll
index 1b35a89ad7f935..4011c21af69046 100644
--- a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll
+++ b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll
@@ -139,10 +139,6 @@ define <4 x i32> @csh_v4i32(<4 x i32> %a, <4 x i32> %b) {
; GISEL-LABEL: csh_v4i32:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: v_and_b32_e32 v4, 31, v4
-; GISEL-NEXT: v_and_b32_e32 v5, 31, v5
-; GISEL-NEXT: v_and_b32_e32 v6, 31, v6
-; GISEL-NEXT: v_and_b32_e32 v7, 31, v7
; GISEL-NEXT: v_lshlrev_b32_e32 v8, v4, v0
; GISEL-NEXT: v_lshlrev_b32_e32 v9, v5, v1
; GISEL-NEXT: v_lshlrev_b32_e32 v10, v6, v2
diff --git a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
index 9f093cc7b5abf2..26a4ea9d8a4b6e 100644
--- a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
@@ -230,49 +230,27 @@ entry:
}
define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) {
-; GFX67-SDAG-LABEL: clpeak_imad_pat_i16:
-; GFX67-SDAG: ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v0, v2
-; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: clpeak_imad_pat_i16:
-; GFX67-GISEL: ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX67-LABEL: clpeak_imad_pat_i16:
+; GFX67: ; %bb.0: ; %entry
+; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v3, v0, v2
+; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
+; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX67-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_i16:
; GFX8-SDAG: ; %bb.0: ; %entry
@@ -337,11 +315,11 @@ define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) {
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -363,13 +341,13 @@ define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
@@ -400,13 +378,13 @@ define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) {
; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
@@ -470,42 +448,40 @@ define <2 x i16> @clpeak_imad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v5, v3, 1
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v4, v2, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v0
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
-; GFX67-GISEL-NEXT: v_or_b32_e32 v3, v3, v4
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2
+; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v6
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i16:
@@ -682,46 +658,43 @@ define <3 x i16> @clpeak_imad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v6, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v7, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v6, v3, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v8, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v7, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v7, v4, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v8, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v8, v5, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v9, v0, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v6, v6, v3, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v10, v1, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v7, v7, v4, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v3, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v9
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v6
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v11, v2, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v8, v8, v5, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v2, v5, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v4
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v10
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v7
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v5
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v11
+; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v8
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v8
-; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
-; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v6
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v3, v0
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v8
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v3, v2
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_v3i16:
@@ -1063,19 +1036,15 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v10, v10, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v10, v5, v1
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v11, 16, v9
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v8, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9
; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v9, v9, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v11, v11, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v8, v4, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v11, v3
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v9, v6, v2
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v11, v7, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
@@ -1085,60 +1054,60 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX67-GISEL-NEXT: v_or_b32_e32 v1, v1, v2
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v10
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v13, v2, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v10, v10, v5, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v2, v5, 1
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v12, v0, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v8, v8, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v10
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v8
-; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v9
-; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v11
-; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
-; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
-; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v6
-; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v7
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6
-; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v2
-; GFX67-GISEL-NEXT: v_or_b32_e32 v5, v5, v6
-; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7
-; GFX67-GISEL-NEXT: v_or_b32_e32 v6, v6, v7
-; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v8
-; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v9
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8
-; GFX67-GISEL-NEXT: v_or_b32_e32 v7, v7, v8
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v15, v3, v7
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v11, v11, v7, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v3, v7, 1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v14, v1, v6
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v9, v9, v6, 1
+; GFX67-GISEL-NEXT: v_or_b32_e32 v8, v8, v10
+; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v11
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v6, 1
+; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10
+; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v12
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v8
+; GFX67-GISEL-NEXT: v_or_b32_e32 v9, v9, v10
+; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v1, v2
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v8
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v5
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v13
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v5, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v14
+; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v9
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v9
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v6
+; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v15
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v6, v3
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v4, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v5
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v9, 16, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v8
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v1, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v1, v9
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v6
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v7
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v2, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v7
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v5
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v6
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v4, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v7
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_v4i16:
@@ -1403,47 +1372,26 @@ entry:
}
define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) {
-; GFX67-SDAG-LABEL: clpeak_umad_pat_i16:
-; GFX67-SDAG: ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v0, v1
-; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 1, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v3, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v3, v2, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: clpeak_umad_pat_i16:
-; GFX67-GISEL: ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX67-LABEL: clpeak_umad_pat_i16:
+; GFX67: ; %bb.0: ; %entry
+; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_mul_u32_u24_e32 v2, v0, v1
+; GFX67-NEXT: v_add_i32_e32 v3, vcc, 1, v1
+; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v3, v2
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT: v_mad_u32_u24 v1, v3, v2, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_umad_pat_i16:
; GFX8-SDAG: ; %bb.0: ; %entry
@@ -1504,11 +1452,11 @@ define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) {
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1530,13 +1478,13 @@ define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -1567,13 +1515,13 @@ define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) {
; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -1637,42 +1585,40 @@ define <2 x i16> @clpeak_umad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v5, v3, 1
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v4, v2, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v0
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
-; GFX67-GISEL-NEXT: v_or_b32_e32 v3, v3, v4
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2
+; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v6
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_umad_pat_v2i16:
@@ -1849,46 +1795,43 @@ define <3 x i16> @clpeak_umad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v6, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v7, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v6, v3, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v8, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v7, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v7, v4, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v8, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v8, v5, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v9, v0, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v6, v6, v3, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v10, v1, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v7, v7, v4, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v3, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v9
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v6
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v11, v2, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v8, v8, v5, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v2, v5, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v4
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v10
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v7
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v5
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v11
+; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v8
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v8
-; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
-; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v6
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v3, v0
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v8
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v3, v2
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_umad_pat_v3i16:
@@ -2230,19 +2173,15 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v10, v10, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v10, v5, v1
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v11, 16, v9
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v8, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9
; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v9, v9, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v11, v11, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v8, v4, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v11, v3
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v9, v6, v2
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v11, v7, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
@@ -2252,60 +2191,60 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX67-GISEL-NEXT: v_or_b32_e32 v1, v1, v2
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v10
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v13, v2, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v10, v10, v5, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v2, v5, 1
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v12, v0, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v8, v8, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v10
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v8
-; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v9
-; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v11
-; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
-; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
-; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v6
-; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v7
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6
-; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v2
-; GFX67-GISEL-NEXT: v_or_b32_e32 v5, v5, v6
-; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7
-; GFX67-GISEL-NEXT: v_or_b32_e32 v6, v6, v7
-; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v8
-; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v9
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8
-; GFX67-GISEL-NEXT: v_or_b32_e32 v7, v7, v8
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v15, v3, v7
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v11, v11, v7, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v3, v7, 1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v14, v1, v6
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v9, v9, v6, 1
+; GFX67-GISEL-NEXT: v_or_b32_e32 v8, v8, v10
+; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v11
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v6, 1
+; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10
+; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v12
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v8
+; GFX67-GISEL-NEXT: v_or_b32_e32 v9, v9, v10
+; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v1, v2
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v8
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v5
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v13
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v5, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v14
+; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v9
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v9
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v6
+; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v15
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v6, v3
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v4, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v5
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v9, 16, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v8
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v1, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v1, v9
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v6
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v7
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v2, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v7
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v5
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v6
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v4, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v7
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_umad_pat_v4i16:
@@ -4282,49 +4221,27 @@ entry:
}
define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) {
-; GFX67-SDAG-LABEL: clpeak_imad_pat_i8:
-; GFX67-SDAG: ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v0, v2
-; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: clpeak_imad_pat_i8:
-; GFX67-GISEL: ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX67-LABEL: clpeak_imad_pat_i8:
+; GFX67: ; %bb.0: ; %entry
+; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX67-NEXT: v_and_b32_e32 v2, 0xff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v3, v0, v2
+; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
+; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX67-NEXT: v_and_b32_e32 v3, 0xff, v3
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX67-NEXT: v_and_b32_e32 v2, 0xff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
+; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX67-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_i8:
; GFX8-SDAG: ; %bb.0: ; %entry
@@ -4389,11 +4306,11 @@ define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) {
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -4415,13 +4332,13 @@ define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8
@@ -4452,13 +4369,13 @@ define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) {
; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8
@@ -4524,32 +4441,30 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) {
; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v4, v2, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v5, v3, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v6
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v7
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v5
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v2, v1
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i8:
@@ -4655,20 +4570,18 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) {
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v4, v0, v2
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v5, v1, v3
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v4, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v5, v1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
-; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v4, 1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v3, v5, 1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v4, v0, 1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v5, v1, 1
+; GFX10-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0
+; GFX10-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1
+; GFX10-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v6, v4, v2
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v7, v5, v3
+; GFX10-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1
+; GFX10-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v6, v0
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v7, v1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2
; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v4
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v5
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i8:
@@ -4704,25 +4617,21 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) {
; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v4, v0, v2
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v5, v1, v3
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v4, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v5, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
-; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v4, 1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v3, v5, 1
+; GFX11-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0
+; GFX11-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1
+; GFX11-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v4, v0, 1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v5, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v6, v4, v2
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v7, v5, v3
+; GFX11-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1
+; GFX11-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v6, v0
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v7, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2
; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v4
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v5
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1200-SDAG-LABEL: clpeak_imad_pat_v2i8:
@@ -4766,25 +4675,21 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) {
; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v4, v0, v2
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v5, v1, v3
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v4, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v5, v1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v4, 1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v3, v5, 1
+; GFX1200-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0
+; GFX1200-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1
+; GFX1200-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v4, v0, 1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v5, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v6, v4, v2
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v7, v5, v3
+; GFX1200-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1
+; GFX1200-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v6, v0
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v7, v1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v4
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v5
; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
%y18 = add <2 x i8> %x, <i8 1, i8 1>
@@ -7600,81 +7505,43 @@ entry:
}
define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) {
-; GFX67-SDAG-LABEL: clpeak_imad_pat_i16_x2:
-; GFX67-SDAG: ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v0, v2
-; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: clpeak_imad_pat_i16_x2:
-; GFX67-GISEL: ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX67-LABEL: clpeak_imad_pat_i16_x2:
+; GFX67: ; %bb.0: ; %entry
+; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v3, v0, v2
+; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v2
+; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v2
+; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
+; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX67-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_i16_x2:
; GFX8-SDAG: ; %bb.0: ; %entry
@@ -7767,19 +7634,19 @@ define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) {
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX10-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX10-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -7807,23 +7674,23 @@ define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
@@ -7860,23 +7727,23 @@ define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) {
; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
@@ -7902,79 +7769,42 @@ entry:
}
define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) {
-; GFX67-SDAG-LABEL: clpeak_umad_pat_i16_x2:
-; GFX67-SDAG: ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v0, v1
-; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 1, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v3, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v1
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v3, v2, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v2, v3
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v1
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v2, v3, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v2, v3
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v2, v3, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: clpeak_umad_pat_i16_x2:
-; GFX67-GISEL: ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX67-LABEL: clpeak_umad_pat_i16_x2:
+; GFX67: ; %bb.0: ; %entry
+; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_mul_u32_u24_e32 v2, v0, v1
+; GFX67-NEXT: v_add_i32_e32 v3, vcc, 1, v1
+; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v3, v2
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v1
+; GFX67-NEXT: v_mad_u32_u24 v2, v3, v2, 1
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v2, v3
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v1
+; GFX67-NEXT: v_mad_u32_u24 v2, v2, v3, 1
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v2, v3
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT: v_mad_u32_u24 v1, v2, v3, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_umad_pat_i16_x2:
; GFX8-SDAG: ; %bb.0: ; %entry
@@ -8063,19 +7893,19 @@ define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) {
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX10-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX10-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -8103,23 +7933,23 @@ define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -8156,23 +7986,23 @@ define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) {
; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -8268,10 +8098,8 @@ define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
@@ -8279,9 +8107,9 @@ define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v5, v3, 1
; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v4, v2, 1
; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
@@ -8290,64 +8118,60 @@ define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v4
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v1, v5, v3
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v0, v4, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v5, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v0
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v4
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
-; GFX67-GISEL-NEXT: v_or_b32_e32 v3, v3, v4
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v3, v5, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v2, v4, v0
+; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v3, v5, 1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v2, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
+; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2
+; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v6
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i16_x2:
@@ -8591,10 +8415,8 @@ define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
@@ -8602,9 +8424,9 @@ define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v5, v3, 1
; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v4, v2, 1
; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
@@ -8613,64 +8435,60 @@ define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v4
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v1, v5, v3
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v0, v4, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v5, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v0
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v4
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
-; GFX67-GISEL-NEXT: v_or_b32_e32 v3, v3, v4
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v3, v5, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v2, v4, v0
+; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v3, v5, 1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v2, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
+; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2
+; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v6
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_umad_pat_v2i16_x2:
@@ -8908,24 +8726,14 @@ entry:
}
define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) {
-; GFX67-SDAG-LABEL: multi_use_mul_mad_i16_var:
-; GFX67-SDAG: ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v4, v1, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v4, v1, v3
-; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: multi_use_mul_mad_i16_var:
-; GFX67-GISEL: ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v0, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
-; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX67-LABEL: multi_use_mul_mad_i16_var:
+; GFX67: ; %bb.0: ; %entry
+; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT: v_and_b32_e32 v4, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_mad_u32_u24 v0, v4, v1, v2
+; GFX67-NEXT: v_mad_u32_u24 v1, v4, v1, v3
+; GFX67-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: multi_use_mul_mad_i16_var:
; GFX8-SDAG: ; %bb.0: ; %entry
@@ -8973,10 +8781,9 @@ define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) {
; GFX10-GISEL-LABEL: multi_use_mul_mad_i16_var:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v0, v2
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, v3
-; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3
+; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -8992,12 +8799,10 @@ define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) {
; GFX11-GISEL-LABEL: multi_use_mul_mad_i16_var:
; GFX11-GISEL: ; %bb.0: ; %entry
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v0, v2
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, v3
-; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -9021,12 +8826,10 @@ define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) {
; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v0, v2
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, v3
-; GFX1200-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2
; GFX1200-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -9108,29 +8911,17 @@ entry:
}
define i16 @other_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z, ptr addrspace(3) %ptr) {
-; GFX67-SDAG-LABEL: other_use_mul_mad_i16_var:
-; GFX67-SDAG: ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v1
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, v2
-; GFX67-SDAG-NEXT: s_mov_b32 m0, -1
-; GFX67-SDAG-NEXT: ds_write_b16 v3, v4
-; GFX67-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: other_use_mul_mad_i16_var:
-; GFX67-GISEL: ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v0, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v2
-; GFX67-GISEL-NEXT: s_mov_b32 m0, -1
-; GFX67-GISEL-NEXT: ds_write_b16 v3, v1
-; GFX67-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX67-LABEL: other_use_mul_mad_i16_var:
+; GFX67: ; %bb.0: ; %entry
+; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v1
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, v2
+; GFX67-NEXT: s_mov_b32 m0, -1
+; GFX67-NEXT: ds_write_b16 v3, v4
+; GFX67-NEXT: s_waitcnt lgkmcnt(0)
+; GFX67-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: other_use_mul_mad_i16_var:
; GFX8: ; %bb.0: ; %entry
@@ -9151,69 +8942,36 @@ define i16 @other_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z, ptr addrspace(3) %
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: other_use_mul_mad_i16_var:
-; GFX10-SDAG: ; %bb.0: ; %entry
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_mul_lo_u16 v4, v0, v1
-; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v2
-; GFX10-SDAG-NEXT: ds_write_b16 v3, v4
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: other_use_mul_mad_i16_var:
-; GFX10-GISEL: ; %bb.0: ; %entry
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v1, v2
-; GFX10-GISEL-NEXT: ds_write_b16 v3, v1
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: other_use_mul_mad_i16_var:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_mul_lo_u16 v4, v0, v1
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v1, v2
-; GFX11-SDAG-NEXT: ds_store_b16 v3, v4
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: other_use_mul_mad_i16_var:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v0, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v1, v2
-; GFX11-GISEL-NEXT: ds_store_b16 v3, v1
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: other_use_mul_mad_i16_var:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_mul_lo_u16 v4, v0, v1
+; GFX10-NEXT: v_mad_u16 v0, v0, v1, v2
+; GFX10-NEXT: ds_write_b16 v3, v4
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1200-SDAG-LABEL: other_use_mul_mad_i16_var:
-; GFX1200-SDAG: ; %bb.0: ; %entry
-; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX1200-SDAG-NEXT: v_mul_lo_u16 v4, v0, v1
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v1, v2
-; GFX1200-SDAG-NEXT: ds_store_b16 v3, v4
-; GFX1200-SDAG-NEXT: s_wait_dscnt 0x0
-; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: other_use_mul_mad_i16_var:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_lo_u16 v4, v0, v1
+; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2
+; GFX11-NEXT: ds_store_b16 v3, v4
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1200-GISEL-LABEL: other_use_mul_mad_i16_var:
-; GFX1200-GISEL: ; %bb.0: ; %entry
-; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v0, v1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v1, v2
-; GFX1200-GISEL-NEXT: ds_store_b16 v3, v1
-; GFX1200-GISEL-NEXT: s_wait_dscnt 0x0
-; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1200-LABEL: other_use_mul_mad_i16_var:
+; GFX1200: ; %bb.0: ; %entry
+; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-NEXT: s_wait_expcnt 0x0
+; GFX1200-NEXT: s_wait_samplecnt 0x0
+; GFX1200-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-NEXT: s_wait_kmcnt 0x0
+; GFX1200-NEXT: v_mul_lo_u16 v4, v0, v1
+; GFX1200-NEXT: v_mad_u16 v0, v0, v1, v2
+; GFX1200-NEXT: ds_store_b16 v3, v4
+; GFX1200-NEXT: s_wait_dscnt 0x0
+; GFX1200-NEXT: s_setpc_b64 s[30:31]
entry:
%mul = mul i16 %x, %y
%add0 = add i16 %mul, %z
@@ -9246,16 +9004,14 @@ define <4 x i16> @multi_use_mul_mad_v2i16_var(<2 x i16> %x, <2 x i16> %y, <2 x i
; GFX67-GISEL-LABEL: multi_use_mul_mad_v2i16_var:
; GFX67-GISEL: ; %bb.0: ; %entry
; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v0, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v0, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v4
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v3, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
+; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v8, v2, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v9, v3, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v8, v2, v6
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v9, v3, v7
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: multi_use_mul_mad_v2i16_var:
@@ -9366,20 +9122,20 @@ define <2 x i16> @other_use_mul_mad_v2i16_var(<2 x i16> %x, <2 x i16> %y, <2 x i
; GFX67-GISEL-LABEL: other_use_mul_mad_v2i16_var:
; GFX67-GISEL: ; %bb.0: ; %entry
; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v1, v3
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v0, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8
+; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8
+; GFX67-GISEL-NEXT: v_or_b32_e32 v7, v7, v8
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, v5
; GFX67-GISEL-NEXT: s_mov_b32 m0, -1
-; GFX67-GISEL-NEXT: ds_write_b32 v6, v2
+; GFX67-GISEL-NEXT: ds_write_b32 v6, v7
; GFX67-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -9532,29 +9288,15 @@ define i64 @mul_u24_add64(i32 %x, i32 %y, i64 %z) {
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1200-SDAG-LABEL: mul_u24_add64:
-; GFX1200-SDAG: ; %bb.0:
-; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3]
-; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-GISEL-LABEL: mul_u24_add64:
-; GFX1200-GISEL: ; %bb.0:
-; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX1200-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v1
-; GFX1200-GISEL-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2
-; GFX1200-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1200-LABEL: mul_u24_add64:
+; GFX1200: ; %bb.0:
+; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-NEXT: s_wait_expcnt 0x0
+; GFX1200-NEXT: s_wait_samplecnt 0x0
+; GFX1200-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-NEXT: s_wait_kmcnt 0x0
+; GFX1200-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3]
+; GFX1200-NEXT: s_setpc_b64 s[30:31]
%mul = call i64 @llvm.amdgcn.mul.u24.i64(i32 %x, i32 %y)
%add = add i64 %mul, %z
ret i64 %add
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 83599e789e10b9..84f23985b64213 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -1350,13 +1350,10 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
// Handle the case where the MVT/register class is omitted in the dest pattern
// but MVT exists in the source pattern.
- if (isa<UnsetInit>(DstChild.getLeafValue())) {
- for (const TreePatternNode &SrcChild : Src.children()) {
- if (SrcChild.getName() == DstChild.getName()) {
- DstMIBuilder.addRenderer<CopyRenderer>(SrcChild.getName());
- return InsertPt;
- }
- }
+ if (isa<UnsetInit>(DstChild.getLeafValue()) &&
+ Rule.hasOperand(DstChild.getName())) {
+ DstMIBuilder.addRenderer<CopyRenderer>(DstChild.getName());
+ return InsertPt;
}
return failedImport("Dst pattern child is an unsupported kind");
}
>From 4789a60a3be0d9417522d80564f28bfd0fd882da Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Tue, 17 Dec 2024 19:39:15 +0300
Subject: [PATCH 2/2] [TableGen][GISel] Learn to import patterns with
optional/physreg defs
---
.../Target/GlobalISel/SelectionDAGCompat.td | 2 +
.../test/CodeGen/X86/GlobalISel/mul-scalar.ll | 9 +-
.../select-intrinsic-x86-flags-read-u32.mir | 2 +-
.../Common/GlobalISelEmitterCommon.td | 3 +-
.../GlobalISelEmitter-implicit-defs.td | 62 +++-
.../GlobalISelEmitter-nested-subregs.td | 2 +-
.../TableGen/GlobalISelEmitterRegSequence.td | 2 +-
llvm/test/TableGen/GlobalISelEmitterSubreg.td | 8 +-
.../TableGen/Common/CodeGenRegisters.cpp | 2 +-
llvm/utils/TableGen/Common/CodeGenRegisters.h | 2 +-
.../GlobalISel/GlobalISelMatchTable.cpp | 3 +-
.../Common/GlobalISel/GlobalISelMatchTable.h | 6 +-
llvm/utils/TableGen/GlobalISelEmitter.cpp | 300 +++++++++---------
13 files changed, 225 insertions(+), 178 deletions(-)
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 2148f5be4c41aa..c8c0eeb57099a2 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -70,6 +70,8 @@ def : GINodeEquiv<G_SDIV, sdiv>;
def : GINodeEquiv<G_UDIV, udiv>;
def : GINodeEquiv<G_SREM, srem>;
def : GINodeEquiv<G_UREM, urem>;
+def : GINodeEquiv<G_SDIVREM, sdivrem>;
+def : GINodeEquiv<G_UDIVREM, udivrem>;
def : GINodeEquiv<G_AND, and>;
def : GINodeEquiv<G_OR, or>;
def : GINodeEquiv<G_XOR, xor>;
diff --git a/llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll
index f401f45a06f6a7..3196668c70d8ec 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll
@@ -5,16 +5,17 @@
define i8 @test_mul_i8(i8 %arg1, i8 %arg2) nounwind {
; X64-LABEL: test_mul_i8:
; X64: # %bb.0:
-; X64-NEXT: movsbl %dil, %eax
-; X64-NEXT: imulb %sil
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: mulb %sil
; X64-NEXT: retq
;
; X86-LABEL: test_mul_i8:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cbtw
-; X86-NEXT: imulb %cl
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: mulb %cl
; X86-NEXT: retl
%ret = mul i8 %arg1, %arg2
ret i8 %ret
diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-intrinsic-x86-flags-read-u32.mir b/llvm/test/CodeGen/X86/GlobalISel/select-intrinsic-x86-flags-read-u32.mir
index 332ec2240c5b60..3d1857a274b4b2 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/select-intrinsic-x86-flags-read-u32.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/select-intrinsic-x86-flags-read-u32.mir
@@ -9,7 +9,7 @@
define void @read_flags() { ret void }
; CHECK-LABEL: name: read_flags
; CHECK: bb.0:
- ; CHECK: [[RDFLAGS32_:%[0-9]+]]:gr32 = RDFLAGS32 implicit-def $esp, implicit $esp
+ ; CHECK: [[RDFLAGS32_:%[0-9]+]]:gr32 = RDFLAGS32 implicit-def dead $esp, implicit $esp
; CHECK: $eax = COPY [[RDFLAGS32_]]
...
diff --git a/llvm/test/TableGen/Common/GlobalISelEmitterCommon.td b/llvm/test/TableGen/Common/GlobalISelEmitterCommon.td
index 8f11fee3751844..cfcaf3c76bbf8a 100644
--- a/llvm/test/TableGen/Common/GlobalISelEmitterCommon.td
+++ b/llvm/test/TableGen/Common/GlobalISelEmitterCommon.td
@@ -7,7 +7,8 @@ class MyTargetGenericInstruction : GenericInstruction {
}
def R0 : Register<"r0"> { let Namespace = "MyTarget"; }
-def GPR32 : RegisterClass<"MyTarget", [i32], 32, (add R0)>;
+def R1 : Register<"r0"> { let Namespace = "MyTarget"; }
+def GPR32 : RegisterClass<"MyTarget", [i32], 32, (add R0, R1)>;
def GPR32Op : RegisterOperand<GPR32>;
def F0 : Register<"f0"> { let Namespace = "MyTarget"; }
def FPR32 : RegisterClass<"MyTarget", [f32], 32, (add F0)>;
diff --git a/llvm/test/TableGen/GlobalISelEmitter-implicit-defs.td b/llvm/test/TableGen/GlobalISelEmitter-implicit-defs.td
index 79af1a336f2890..ebf290a27b13ed 100644
--- a/llvm/test/TableGen/GlobalISelEmitter-implicit-defs.td
+++ b/llvm/test/TableGen/GlobalISelEmitter-implicit-defs.td
@@ -1,12 +1,60 @@
-// RUN: llvm-tblgen -gen-global-isel -warn-on-skipped-patterns -I %p/../../include -I %p/Common %s -o /dev/null 2>&1 < %s | FileCheck %s --implicit-check-not="Skipped pattern"
+// RUN: llvm-tblgen -gen-global-isel -I %p/../../include -I %p/Common %s | FileCheck %s
include "llvm/Target/Target.td"
include "GlobalISelEmitterCommon.td"
-// CHECK: Skipped pattern: Pattern defines a physical register
-let Uses = [B0], Defs = [B0] in
-def tst1 : I<(outs), (ins), [(set B0, (add B0, 1))]>;
+let Defs = [R0, R1] in
+def tst1 : I<(outs), (ins), [(set R0, (get_fpenv))]>;
-// CHECK: Skipped pattern: Src pattern result has 1 def(s) without the HasNoUse predicate set to true but Dst MI has no def
-let Uses = [B0] in
-def tst2 : I<(outs), (ins), [(set B0, (add B0, 1))]>;
+let Defs = [R0, R1] in
+def tst2 : I<(outs GPR32:$rd), (ins GPR32:$rs1, GPR32:$rs2),
+ [(set GPR32:$rd, R0, (udivrem i32:$rs1, i32:$rs2))]>;
+
+def : Pat<(sdiv i32:$rs1, i32:$rs2), (tst2 $rs1, $rs2)>;
+def : Pat<(sdivrem i32:$rs1, i32:$rs2), (tst2 $rs1, $rs2)>;
+
+// CHECK-LABEL: // (sdiv:{ *:[i32] } i32:{ *:[i32] }:$rs1, i32:{ *:[i32] }:$rs2) => (tst2:{ *:[i32] }:{ *:[i32] } ?:{ *:[i32] }:$rs1, ?:{ *:[i32] }:$rs2)
+// CHECK-NEXT: GIR_MutateOpcode, /*InsnID*/0, /*RecycleInsnID*/0, /*Opcode*/GIMT_Encode2(MyTarget::tst2),
+// CHECK-NEXT: GIR_AddImplicitDef, /*InsnID*/0, GIMT_Encode2(MyTarget::R0), GIMT_Encode2(RegState::Dead),
+// CHECK-NEXT: GIR_AddImplicitDef, /*InsnID*/0, GIMT_Encode2(MyTarget::R1), GIMT_Encode2(RegState::Dead),
+// CHECK-NEXT: GIR_RootConstrainSelectedInstOperands,
+// CHECK-NEXT: // GIR_Coverage, 2,
+
+// CHECK-LABEL: // (sdivrem:{ *:[i32] }:{ *:[i32] } i32:{ *:[i32] }:$rs1, i32:{ *:[i32] }:$rs2) => (tst2:{ *:[i32] }:{ *:[i32] } ?:{ *:[i32] }:$rs1, ?:{ *:[i32] }:$rs2)
+// CHECK-NEXT: GIR_BuildRootMI, /*Opcode*/GIMT_Encode2(MyTarget::tst2),
+// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/0, // DstI[rd]
+// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/2, // rs1
+// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/3, // rs2
+// CHECK-NEXT: GIR_SetImplicitDefDead, /*InsnID*/0, /*OpIdx for MyTarget::R1*/1,
+// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(TargetOpcode::COPY),
+// CHECK-NEXT: GIR_Copy, /*NewInsnID*/1, /*OldInsnID*/0, /*OpIdx*/1, // DstI[R0]
+// CHECK-NEXT: GIR_AddRegister, /*InsnID*/1, GIMT_Encode2(MyTarget::R0), /*AddRegisterRegFlags*/GIMT_Encode2(0),
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, GIMT_Encode2(MyTarget::GPR32RegClassID),
+// CHECK-NEXT: GIR_RootConstrainSelectedInstOperands,
+// CHECK-NEXT: // GIR_Coverage, 3,
+// CHECK-NEXT: GIR_EraseRootFromParent_Done,
+
+// CHECK-LABEL: // (udivrem:{ *:[i32] }:{ *:[i32] } i32:{ *:[i32] }:$rs1, i32:{ *:[i32] }:$rs2) => (tst2:{ *:[i32] }:{ *:[i32] } i32:{ *:[i32] }:$rs1, i32:{ *:[i32] }:$rs2)
+// CHECK-NEXT: GIR_BuildRootMI, /*Opcode*/GIMT_Encode2(MyTarget::tst2),
+// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/0, // DstI[rd]
+// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/2, // rs1
+// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/3, // rs2
+// CHECK-NEXT: GIR_SetImplicitDefDead, /*InsnID*/0, /*OpIdx for MyTarget::R1*/1,
+// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(TargetOpcode::COPY),
+// CHECK-NEXT: GIR_Copy, /*NewInsnID*/1, /*OldInsnID*/0, /*OpIdx*/1, // DstI[R0]
+// CHECK-NEXT: GIR_AddRegister, /*InsnID*/1, GIMT_Encode2(MyTarget::R0), /*AddRegisterRegFlags*/GIMT_Encode2(0),
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, GIMT_Encode2(MyTarget::GPR32RegClassID),
+// CHECK-NEXT: GIR_RootConstrainSelectedInstOperands,
+// CHECK-NEXT: // GIR_Coverage, 1,
+// CHECK-NEXT: GIR_EraseRootFromParent_Done,
+
+// CHECK-LABEL: // (get_fpenv:{ *:[i32] }) => (tst1:{ *:[i32] })
+// CHECK-NEXT: GIR_BuildRootMI, /*Opcode*/GIMT_Encode2(MyTarget::tst1),
+// CHECK-NEXT: GIR_SetImplicitDefDead, /*InsnID*/0, /*OpIdx for MyTarget::R1*/1,
+// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(TargetOpcode::COPY),
+// CHECK-NEXT: GIR_Copy, /*NewInsnID*/1, /*OldInsnID*/0, /*OpIdx*/0, // DstI[R0]
+// CHECK-NEXT: GIR_AddRegister, /*InsnID*/1, GIMT_Encode2(MyTarget::R0), /*AddRegisterRegFlags*/GIMT_Encode2(0),
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, GIMT_Encode2(MyTarget::GPR32RegClassID),
+// CHECK-NEXT: GIR_RootConstrainSelectedInstOperands,
+// CHECK-NEXT: // GIR_Coverage, 0,
+// CHECK-NEXT: GIR_EraseRootFromParent_Done,
diff --git a/llvm/test/TableGen/GlobalISelEmitter-nested-subregs.td b/llvm/test/TableGen/GlobalISelEmitter-nested-subregs.td
index 1fdb973c1f1ec7..79e55ef2e8b8ce 100644
--- a/llvm/test/TableGen/GlobalISelEmitter-nested-subregs.td
+++ b/llvm/test/TableGen/GlobalISelEmitter-nested-subregs.td
@@ -38,11 +38,11 @@ def A0 : RegisterClass<"MyTarget", [i32], 32, (add a0)>;
// CHECK-NEXT: // MIs[0] src
// CHECK-NEXT: GIM_RootCheckType, /*Op*/1, /*Type*/GILLT_s8,
// CHECK-NEXT: // (anyext:{ *:[i16] } i8:{ *:[i8] }:$src) => (EXTRACT_SUBREG:{ *:[i16] } (INSERT_SUBREG:{ *:[i32] } (IMPLICIT_DEF:{ *:[i32] }), A0b:{ *:[i8] }:$src, lo8:{ *:[i32] }), lo16:{ *:[i32] })
-// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32,
// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/1, /*TypeID*/GILLT_s32,
// CHECK-NEXT: GIR_BuildMI, /*InsnID*/2, /*Opcode*/GIMT_Encode2(TargetOpcode::IMPLICIT_DEF),
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/2, /*TempRegID*/1, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/2,
+// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32,
// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(TargetOpcode::INSERT_SUBREG),
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
// CHECK-NEXT: GIR_AddSimpleTempRegister, /*InsnID*/1, /*TempRegID*/1,
diff --git a/llvm/test/TableGen/GlobalISelEmitterRegSequence.td b/llvm/test/TableGen/GlobalISelEmitterRegSequence.td
index 3829070b28efeb..69f82eac49c161 100644
--- a/llvm/test/TableGen/GlobalISelEmitterRegSequence.td
+++ b/llvm/test/TableGen/GlobalISelEmitterRegSequence.td
@@ -39,12 +39,12 @@ def SUBSOME_INSN : I<(outs SRegs:$dst), (ins SOP:$src), []>;
// CHECK-NEXT: GIM_RootCheckType, /*Op*/1, /*Type*/GILLT_s16,
// CHECK-NEXT: GIM_RootCheckRegBankForClass, /*Op*/1, /*RC*/GIMT_Encode2(Test::SRegsRegClassID),
// CHECK-NEXT: // (sext:{ *:[i32] } SOP:{ *:[i16] }:$src) => (REG_SEQUENCE:{ *:[i32] } DRegs:{ *:[i32] }, (SUBSOME_INSN:{ *:[i16] } SOP:{ *:[i16] }:$src), sub0:{ *:[i32] }, (SUBSOME_INSN:{ *:[i16] } SOP:{ *:[i16] }:$src), sub1:{ *:[i32] })
-// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s16,
// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/1, /*TypeID*/GILLT_s16,
// CHECK-NEXT: GIR_BuildMI, /*InsnID*/2, /*Opcode*/GIMT_Encode2(MyTarget::SUBSOME_INSN),
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/2, /*TempRegID*/1, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
// CHECK-NEXT: GIR_Copy, /*NewInsnID*/2, /*OldInsnID*/0, /*OpIdx*/1, // src
// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/2,
+// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s16,
// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(MyTarget::SUBSOME_INSN),
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
// CHECK-NEXT: GIR_Copy, /*NewInsnID*/1, /*OldInsnID*/0, /*OpIdx*/1, // src
diff --git a/llvm/test/TableGen/GlobalISelEmitterSubreg.td b/llvm/test/TableGen/GlobalISelEmitterSubreg.td
index 8df3238f6cc21e..08e690f3e894de 100644
--- a/llvm/test/TableGen/GlobalISelEmitterSubreg.td
+++ b/llvm/test/TableGen/GlobalISelEmitterSubreg.td
@@ -59,13 +59,13 @@ def : Pat<(sub (complex DOP:$src1, DOP:$src2), 77),
(SOME_INSN2 (EXTRACT_SUBREG DOP:$src1, sub0),
(EXTRACT_SUBREG DOP:$src2, sub1))>;
// CHECK-LABEL: // (sub:{ *:[i32] } (complex:{ *:[i32] } DOP:{ *:[i32] }:$src1, DOP:{ *:[i32] }:$src2), 77:{ *:[i32] }) => (SOME_INSN2:{ *:[i32] } (EXTRACT_SUBREG:{ *:[i32] } DOP:{ *:[i32] }:$src1, sub0:{ *:[i32] }), (EXTRACT_SUBREG:{ *:[i32] } DOP:{ *:[i32] }:$src2, sub1:{ *:[i32] }))
-// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32,
// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/1, /*TypeID*/GILLT_s32,
// CHECK-NEXT: GIR_BuildMI, /*InsnID*/2, /*Opcode*/GIMT_Encode2(TargetOpcode::COPY),
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/2, /*TempRegID*/1, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
// CHECK-NEXT: GIR_ComplexSubOperandSubRegRenderer, /*InsnID*/2, /*RendererID*/GIMT_Encode2(0), /*SubOperand*/1, /*SubRegIdx*/GIMT_Encode2(2), // src2
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/2, /*Op*/0, GIMT_Encode2(Test::SRegsRegClassID),
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/2, /*Op*/1, GIMT_Encode2(Test::DRegsRegClassID),
+// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32,
// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(TargetOpcode::COPY),
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
// CHECK-NEXT: GIR_ComplexSubOperandSubRegRenderer, /*InsnID*/1, /*RendererID*/GIMT_Encode2(0), /*SubOperand*/0, /*SubRegIdx*/GIMT_Encode2(1), // src1
@@ -103,11 +103,11 @@ def : Pat<(i32 (anyext i16:$src)), (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SOP:$src
// instruction.
def : Pat<(i32 (anyext i16:$src)), (SOME_INSN (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SOP:$src, sub0))>;
// CHECK-LABEL: (anyext:{ *:[i32] } i16:{ *:[i16] }:$src) => (SOME_INSN:{ *:[i32] } (INSERT_SUBREG:{ *:[i32] } (IMPLICIT_DEF:{ *:[i32] }), SOP:{ *:[i16] }:$src, sub0:{ *:[i32] }))
-// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32,
// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/1, /*TypeID*/GILLT_s32,
// CHECK-NEXT: GIR_BuildMI, /*InsnID*/2, /*Opcode*/GIMT_Encode2(TargetOpcode::IMPLICIT_DEF),
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/2, /*TempRegID*/1, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/2,
+// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32,
// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(TargetOpcode::INSERT_SUBREG),
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
// CHECK-NEXT: GIR_AddSimpleTempRegister, /*InsnID*/1, /*TempRegID*/1,
@@ -138,12 +138,12 @@ def : Pat<(i32 (anyext i16:$src)), (INSERT_SUBREG (i32 (COPY_TO_REGCLASS SOP:$sr
// by a subinstruction.
def : Pat<(i32 (anyext i16:$src)), (INSERT_SUBREG (i32 (IMPLICIT_DEF)), (SUBSOME_INSN SOP:$src), sub0)>;
// CHECK-LABEL: (anyext:{ *:[i32] } i16:{ *:[i16] }:$src) => (INSERT_SUBREG:{ *:[i32] } (IMPLICIT_DEF:{ *:[i32] }), (SUBSOME_INSN:{ *:[i16] } SOP:{ *:[i16] }:$src), sub0:{ *:[i32] })
-// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32,
// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/1, /*TypeID*/GILLT_s16,
// CHECK-NEXT: GIR_BuildMI, /*InsnID*/2, /*Opcode*/GIMT_Encode2(MyTarget::SUBSOME_INSN),
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/2, /*TempRegID*/1, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
// CHECK-NEXT: GIR_Copy, /*NewInsnID*/2, /*OldInsnID*/0, /*OpIdx*/1, // src
// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/2,
+// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32,
// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(TargetOpcode::IMPLICIT_DEF),
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/1,
@@ -200,12 +200,12 @@ def : Pat<(i16 (trunc (bitreverse DOP:$src))),
// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/1, /*Op*/1, /*RC*/GIMT_Encode2(Test::DRegsRegClassID),
// CHECK-NEXT: GIM_CheckIsSafeToFold, /*NumInsns*/1,
// CHECK-NEXT: // (trunc:{ *:[i16] } (ctpop:{ *:[i32] } DOP:{ *:[i32] }:$src)) => (SUBSOME_INSN2:{ *:[i16] } (EXTRACT_SUBREG:{ *:[i16] } (SOME_INSN:{ *:[i32] } DOP:{ *:[i32] }:$src), sub0:{ *:[i32] }))
-// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s16,
// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/1, /*TypeID*/GILLT_s32,
// CHECK-NEXT: GIR_BuildMI, /*InsnID*/2, /*Opcode*/GIMT_Encode2(MyTarget::SOME_INSN),
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/2, /*TempRegID*/1, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
// CHECK-NEXT: GIR_Copy, /*NewInsnID*/2, /*OldInsnID*/1, /*OpIdx*/1, // src
// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/2,
+// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s16,
// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(TargetOpcode::COPY),
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
// CHECK-NEXT: GIR_AddTempSubRegister, /*InsnID*/1, /*TempRegID*/1, /*TempRegFlags*/GIMT_Encode2(0), GIMT_Encode2(sub0),
diff --git a/llvm/utils/TableGen/Common/CodeGenRegisters.cpp b/llvm/utils/TableGen/Common/CodeGenRegisters.cpp
index 2dbee94d7e5406..011d11184c70c7 100644
--- a/llvm/utils/TableGen/Common/CodeGenRegisters.cpp
+++ b/llvm/utils/TableGen/Common/CodeGenRegisters.cpp
@@ -2494,7 +2494,7 @@ CodeGenRegBank::getRegClassForRegister(const Record *R) {
const CodeGenRegisterClass *
CodeGenRegBank::getMinimalPhysRegClass(const Record *RegRecord,
- ValueTypeByHwMode *VT) {
+ const ValueTypeByHwMode *VT) {
const CodeGenRegister *Reg = getReg(RegRecord);
const CodeGenRegisterClass *BestRC = nullptr;
for (const auto &RC : getRegClasses()) {
diff --git a/llvm/utils/TableGen/Common/CodeGenRegisters.h b/llvm/utils/TableGen/Common/CodeGenRegisters.h
index 2fa6cab2afb892..90489cae6164ba 100644
--- a/llvm/utils/TableGen/Common/CodeGenRegisters.h
+++ b/llvm/utils/TableGen/Common/CodeGenRegisters.h
@@ -792,7 +792,7 @@ class CodeGenRegBank {
// with a matching type
const CodeGenRegisterClass *
getMinimalPhysRegClass(const Record *RegRecord,
- ValueTypeByHwMode *VT = nullptr);
+ const ValueTypeByHwMode *VT = nullptr);
// Get the sum of unit weights.
unsigned getRegUnitSetWeight(const std::vector<unsigned> &Units) const {
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
index 15ec7e17130de4..6039211bc6cf00 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
@@ -1994,7 +1994,8 @@ void AddRegisterRenderer::emitRenderOpcodes(MatchTable &Table,
// really needed for a physical register reference. We can pack the
// register and flags in a single field.
if (IsDef)
- Table << MatchTable::NamedValue(2, "RegState::Define");
+ Table << MatchTable::NamedValue(
+ 2, IsDead ? "RegState::Define | RegState::Dead" : "RegState::Define");
else
Table << MatchTable::IntValue(2, 0);
Table << MatchTable::LineBreak;
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
index 00fe073057c5c9..48ce71be677c08 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
@@ -2091,13 +2091,15 @@ class AddRegisterRenderer : public OperandRenderer {
unsigned InsnID;
const Record *RegisterDef;
bool IsDef;
+ bool IsDead;
const CodeGenTarget &Target;
public:
AddRegisterRenderer(unsigned InsnID, const CodeGenTarget &Target,
- const Record *RegisterDef, bool IsDef = false)
+ const Record *RegisterDef, bool IsDef = false,
+ bool IsDead = false)
: OperandRenderer(OR_Register), InsnID(InsnID), RegisterDef(RegisterDef),
- IsDef(IsDef), Target(Target) {}
+ IsDef(IsDef), IsDead(IsDead), Target(Target) {}
static bool classof(const OperandRenderer *R) {
return R->getKind() == OR_Register;
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 84f23985b64213..4fa64248878b40 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -324,8 +324,6 @@ class GlobalISelEmitter final : public GlobalISelMatchTableExecutorEmitter {
void emitTestSimplePredicate(raw_ostream &OS) override;
void emitRunCustomAction(raw_ostream &OS) override;
- void postProcessRule(RuleMatcher &M);
-
const CodeGenTarget &getTarget() const override { return Target; }
StringRef getClassName() const override { return ClassName; }
@@ -384,44 +382,41 @@ class GlobalISelEmitter final : public GlobalISelMatchTableExecutorEmitter {
Error importRulePredicates(RuleMatcher &M,
ArrayRef<const Record *> Predicates);
- Expected<InstructionMatcher &>
- createAndImportSelDAGMatcher(RuleMatcher &Rule,
- InstructionMatcher &InsnMatcher,
- const TreePatternNode &Src, unsigned &TempOpIdx);
+ Expected<InstructionMatcher &> createAndImportSelDAGMatcher(
+ RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
+ const PatternToMatch &P, const TreePatternNode &Src, unsigned &TempOpIdx);
Error importComplexPatternOperandMatcher(OperandMatcher &OM, const Record *R,
unsigned &TempOpIdx) const;
Error importChildMatcher(RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
+ const PatternToMatch &P,
const TreePatternNode &SrcChild,
bool OperandIsAPointer, bool OperandIsImmArg,
unsigned OpIdx, unsigned &TempOpIdx);
Expected<BuildMIAction &> createAndImportInstructionRenderer(
- RuleMatcher &M, InstructionMatcher &InsnMatcher,
+ RuleMatcher &M, InstructionMatcher &InsnMatcher, const PatternToMatch &P,
const TreePatternNode &Src, const TreePatternNode &Dst);
Expected<action_iterator> createAndImportSubInstructionRenderer(
- action_iterator InsertPt, RuleMatcher &M, const TreePatternNode &Dst,
- const TreePatternNode &Src, unsigned TempReg);
+ action_iterator InsertPt, RuleMatcher &M, const PatternToMatch &P,
+ const TreePatternNode &Dst, unsigned TempReg);
Expected<action_iterator>
createInstructionRenderer(action_iterator InsertPt, RuleMatcher &M,
const TreePatternNode &Dst);
Expected<action_iterator>
- importExplicitDefRenderers(action_iterator InsertPt, RuleMatcher &M,
- BuildMIAction &DstMIBuilder,
- const TreePatternNode &Src,
- const TreePatternNode &Dst, unsigned Start = 0);
+ importDefRenderers(action_iterator InsertPt, RuleMatcher &M,
+ BuildMIAction &DstMIBuilder, const PatternToMatch &P,
+ const TreePatternNode &Dst, unsigned Start = 0);
Expected<action_iterator> importExplicitUseRenderers(
action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
- const llvm::TreePatternNode &Dst, const TreePatternNode &Src);
+ const PatternToMatch &P, const TreePatternNode &Dst);
Expected<action_iterator> importExplicitUseRenderer(
action_iterator InsertPt, RuleMatcher &Rule, BuildMIAction &DstMIBuilder,
- const TreePatternNode &DstChild, const TreePatternNode &Src);
+ const PatternToMatch &P, const TreePatternNode &Dst);
Error importDefaultOperandRenderers(action_iterator InsertPt, RuleMatcher &M,
BuildMIAction &DstMIBuilder,
const DAGDefaultOperand &DefaultOp) const;
- Error importImplicitDefRenderers(BuildMIAction &DstMIBuilder,
- ArrayRef<const Record *> ImplicitDefs) const;
/// Analyze pattern \p P, returning a matcher for it if possible.
/// Otherwise, return an Error explaining why we don't support it.
@@ -725,7 +720,7 @@ Expected<InstructionMatcher &> GlobalISelEmitter::addBuiltinPredicates(
}
Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
- RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
+ RuleMatcher &Rule, InstructionMatcher &InsnMatcher, const PatternToMatch &P,
const TreePatternNode &Src, unsigned &TempOpIdx) {
const auto SavedFlags = Rule.setGISelFlags(Src.getGISelFlagsRecord());
@@ -925,9 +920,9 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
OperandIsImmArg |= II->isParamImmArg(I - 1);
}
- if (auto Error =
- importChildMatcher(Rule, InsnMatcher, SrcChild, OperandIsAPointer,
- OperandIsImmArg, OpIdx++, TempOpIdx))
+ if (auto Error = importChildMatcher(Rule, InsnMatcher, P, SrcChild,
+ OperandIsAPointer, OperandIsImmArg,
+ OpIdx++, TempOpIdx))
return std::move(Error);
}
}
@@ -966,7 +961,7 @@ static StringRef getSrcChildName(const TreePatternNode &SrcChild,
}
Error GlobalISelEmitter::importChildMatcher(
- RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
+ RuleMatcher &Rule, InstructionMatcher &InsnMatcher, const PatternToMatch &P,
const TreePatternNode &SrcChild, bool OperandIsAPointer,
bool OperandIsImmArg, unsigned OpIdx, unsigned &TempOpIdx) {
@@ -1087,7 +1082,7 @@ Error GlobalISelEmitter::importChildMatcher(
// Map the node to a gMIR instruction.
InstructionOperandMatcher &InsnOperand = **MaybeInsnOperand;
auto InsnMatcherOrError = createAndImportSelDAGMatcher(
- Rule, InsnOperand.getInsnMatcher(), SrcChild, TempOpIdx);
+ Rule, InsnOperand.getInsnMatcher(), P, SrcChild, TempOpIdx);
if (auto Error = InsnMatcherOrError.takeError())
return Error;
@@ -1178,8 +1173,8 @@ Error GlobalISelEmitter::importChildMatcher(
// has to succeed.
OperandMatcher &OM =
InsnOperand.getInsnMatcher().addOperand(0, "", TempOpIdx);
- if (auto Error =
- OM.addTypeCheckPredicate(TypeSetByHwMode(VTy), false /* OperandIsAPointer */))
+ if (auto Error = OM.addTypeCheckPredicate(TypeSetByHwMode(VTy),
+ false /* OperandIsAPointer */))
return failedImport(toString(std::move(Error)) +
" for result of Src pattern operator");
@@ -1198,23 +1193,22 @@ Error GlobalISelEmitter::importChildMatcher(
Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
action_iterator InsertPt, RuleMatcher &Rule, BuildMIAction &DstMIBuilder,
- const TreePatternNode &DstChild, const TreePatternNode &Src) {
+ const PatternToMatch &P, const TreePatternNode &Dst) {
- const auto &SubOperand = Rule.getComplexSubOperand(DstChild.getName());
+ const auto &SubOperand = Rule.getComplexSubOperand(Dst.getName());
if (SubOperand) {
DstMIBuilder.addRenderer<RenderComplexPatternOperand>(
- *std::get<0>(*SubOperand), DstChild.getName(), std::get<1>(*SubOperand),
+ *std::get<0>(*SubOperand), Dst.getName(), std::get<1>(*SubOperand),
std::get<2>(*SubOperand));
return InsertPt;
}
- if (!DstChild.isLeaf()) {
- if (DstChild.getOperator()->isSubClassOf("SDNodeXForm")) {
- auto &Child = DstChild.getChild(0);
- auto I = SDNodeXFormEquivs.find(DstChild.getOperator());
+ if (!Dst.isLeaf()) {
+ if (Dst.getOperator()->isSubClassOf("SDNodeXForm")) {
+ auto &Child = Dst.getChild(0);
+ auto I = SDNodeXFormEquivs.find(Dst.getOperator());
if (I != SDNodeXFormEquivs.end()) {
- const Record *XFormOpc =
- DstChild.getOperator()->getValueAsDef("Opcode");
+ const Record *XFormOpc = Dst.getOperator()->getValueAsDef("Opcode");
if (XFormOpc->getName() == "timm") {
// If this is a TargetConstant, there won't be a corresponding
// instruction to transform. Instead, this will refer directly to an
@@ -1233,10 +1227,10 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
// We accept 'bb' here. It's an operator because BasicBlockSDNode isn't
// inline, but in MI it's just another operand.
- if (DstChild.getOperator()->isSubClassOf("SDNode")) {
- auto &ChildSDNI = CGP.getSDNodeInfo(DstChild.getOperator());
+ if (Dst.getOperator()->isSubClassOf("SDNode")) {
+ auto &ChildSDNI = CGP.getSDNodeInfo(Dst.getOperator());
if (ChildSDNI.getSDClassName() == "BasicBlockSDNode") {
- DstMIBuilder.addRenderer<CopyRenderer>(DstChild.getName());
+ DstMIBuilder.addRenderer<CopyRenderer>(Dst.getName());
return InsertPt;
}
}
@@ -1245,26 +1239,25 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
// rendered as operands.
// FIXME: The target should be able to choose sign-extended when appropriate
// (e.g. on Mips).
- if (DstChild.getOperator()->getName() == "timm") {
- DstMIBuilder.addRenderer<CopyRenderer>(DstChild.getName());
+ if (Dst.getOperator()->getName() == "timm") {
+ DstMIBuilder.addRenderer<CopyRenderer>(Dst.getName());
return InsertPt;
}
- if (DstChild.getOperator()->getName() == "tframeindex") {
- DstMIBuilder.addRenderer<CopyRenderer>(DstChild.getName());
+ if (Dst.getOperator()->getName() == "tframeindex") {
+ DstMIBuilder.addRenderer<CopyRenderer>(Dst.getName());
return InsertPt;
}
- if (DstChild.getOperator()->getName() == "imm") {
- DstMIBuilder.addRenderer<CopyConstantAsImmRenderer>(DstChild.getName());
+ if (Dst.getOperator()->getName() == "imm") {
+ DstMIBuilder.addRenderer<CopyConstantAsImmRenderer>(Dst.getName());
return InsertPt;
}
- if (DstChild.getOperator()->getName() == "fpimm") {
- DstMIBuilder.addRenderer<CopyFConstantAsFPImmRenderer>(
- DstChild.getName());
+ if (Dst.getOperator()->getName() == "fpimm") {
+ DstMIBuilder.addRenderer<CopyFConstantAsFPImmRenderer>(Dst.getName());
return InsertPt;
}
- if (DstChild.getOperator()->isSubClassOf("Instruction")) {
- auto OpTy = getInstResultType(DstChild, Target);
+ if (Dst.getOperator()->isSubClassOf("Instruction")) {
+ auto OpTy = getInstResultType(Dst, Target);
if (!OpTy)
return OpTy.takeError();
@@ -1274,29 +1267,28 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID);
auto InsertPtOrError = createAndImportSubInstructionRenderer(
- ++InsertPt, Rule, DstChild, Src, TempRegID);
+ ++InsertPt, Rule, P, Dst, TempRegID);
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
return InsertPtOrError.get();
}
return failedImport("Dst pattern child isn't a leaf node or an MBB" +
- llvm::to_string(DstChild));
+ llvm::to_string(Dst));
}
// It could be a specific immediate in which case we should just check for
// that immediate.
- if (const IntInit *ChildIntInit =
- dyn_cast<IntInit>(DstChild.getLeafValue())) {
+ if (const IntInit *ChildIntInit = dyn_cast<IntInit>(Dst.getLeafValue())) {
DstMIBuilder.addRenderer<ImmRenderer>(ChildIntInit->getValue());
return InsertPt;
}
// Otherwise, we're looking for a bog-standard RegisterClass operand.
- if (auto *ChildDefInit = dyn_cast<DefInit>(DstChild.getLeafValue())) {
+ if (auto *ChildDefInit = dyn_cast<DefInit>(Dst.getLeafValue())) {
auto *ChildRec = ChildDefInit->getDef();
- ArrayRef<TypeSetByHwMode> ChildTypes = DstChild.getExtTypes();
+ ArrayRef<TypeSetByHwMode> ChildTypes = Dst.getExtTypes();
if (ChildTypes.size() != 1)
return failedImport("Dst pattern child has multiple results");
@@ -1317,11 +1309,11 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
if (ChildRec->isSubClassOf("RegisterOperand") &&
!ChildRec->isValueUnset("GIZeroRegister")) {
DstMIBuilder.addRenderer<CopyOrAddZeroRegRenderer>(
- DstChild.getName(), ChildRec->getValueAsDef("GIZeroRegister"));
+ Dst.getName(), ChildRec->getValueAsDef("GIZeroRegister"));
return InsertPt;
}
- DstMIBuilder.addRenderer<CopyRenderer>(DstChild.getName());
+ DstMIBuilder.addRenderer<CopyRenderer>(Dst.getName());
return InsertPt;
}
@@ -1337,9 +1329,9 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
return failedImport(
"SelectionDAG ComplexPattern not mapped to GlobalISel");
- const OperandMatcher &OM = Rule.getOperandMatcher(DstChild.getName());
+ const OperandMatcher &OM = Rule.getOperandMatcher(Dst.getName());
DstMIBuilder.addRenderer<RenderComplexPatternOperand>(
- *ComplexPattern->second, DstChild.getName(),
+ *ComplexPattern->second, Dst.getName(),
OM.getAllocatedTemporariesBaseID());
return InsertPt;
}
@@ -1350,17 +1342,16 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
// Handle the case where the MVT/register class is omitted in the dest pattern
// but MVT exists in the source pattern.
- if (isa<UnsetInit>(DstChild.getLeafValue()) &&
- Rule.hasOperand(DstChild.getName())) {
- DstMIBuilder.addRenderer<CopyRenderer>(DstChild.getName());
+ if (isa<UnsetInit>(Dst.getLeafValue()) && Rule.hasOperand(Dst.getName())) {
+ DstMIBuilder.addRenderer<CopyRenderer>(Dst.getName());
return InsertPt;
}
return failedImport("Dst pattern child is an unsupported kind");
}
Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
- RuleMatcher &M, InstructionMatcher &InsnMatcher, const TreePatternNode &Src,
- const TreePatternNode &Dst) {
+ RuleMatcher &M, InstructionMatcher &InsnMatcher, const PatternToMatch &P,
+ const TreePatternNode &Src, const TreePatternNode &Dst) {
auto InsertPtOrError = createInstructionRenderer(M.actions_end(), M, Dst);
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
@@ -1380,13 +1371,11 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
}
if (auto Error =
- importExplicitDefRenderers(InsertPt, M, DstMIBuilder, Src, Dst)
- .takeError())
+ importDefRenderers(InsertPt, M, DstMIBuilder, P, Dst).takeError())
return std::move(Error);
- if (auto Error =
- importExplicitUseRenderers(InsertPt, M, DstMIBuilder, Dst, Src)
- .takeError())
+ if (auto Error = importExplicitUseRenderers(InsertPt, M, DstMIBuilder, P, Dst)
+ .takeError())
return std::move(Error);
return DstMIBuilder;
@@ -1394,8 +1383,8 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
Expected<action_iterator>
GlobalISelEmitter::createAndImportSubInstructionRenderer(
- const action_iterator InsertPt, RuleMatcher &M, const TreePatternNode &Dst,
- const TreePatternNode &Src, unsigned TempRegID) {
+ const action_iterator InsertPt, RuleMatcher &M, const PatternToMatch &P,
+ const TreePatternNode &Dst, unsigned TempRegID) {
auto InsertPtOrError = createInstructionRenderer(InsertPt, M, Dst);
// TODO: Assert there's exactly one result.
@@ -1410,15 +1399,13 @@ GlobalISelEmitter::createAndImportSubInstructionRenderer(
DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID, true);
// Handle additional (ignored) results.
- if (DstMIBuilder.getCGI()->Operands.NumDefs > 1) {
- InsertPtOrError = importExplicitDefRenderers(
- std::prev(*InsertPtOrError), M, DstMIBuilder, Src, Dst, /*Start=*/1);
- if (auto Error = InsertPtOrError.takeError())
- return std::move(Error);
- }
+ InsertPtOrError = importDefRenderers(std::prev(*InsertPtOrError), M,
+ DstMIBuilder, P, Dst, /*Start=*/1);
+ if (auto Error = InsertPtOrError.takeError())
+ return std::move(Error);
InsertPtOrError = importExplicitUseRenderers(InsertPtOrError.get(), M,
- DstMIBuilder, Dst, Src);
+ DstMIBuilder, P, Dst);
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
@@ -1450,29 +1437,39 @@ Expected<action_iterator> GlobalISelEmitter::createInstructionRenderer(
DstI);
}
-Expected<action_iterator> GlobalISelEmitter::importExplicitDefRenderers(
+Expected<action_iterator> GlobalISelEmitter::importDefRenderers(
action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
- const TreePatternNode &Src, const TreePatternNode &Dst, unsigned Start) {
+ const PatternToMatch &P, const TreePatternNode &Dst, unsigned Start) {
const CodeGenInstruction *DstI = DstMIBuilder.getCGI();
- const unsigned SrcNumDefs = Src.getExtTypes().size();
- const unsigned DstNumDefs = DstI->Operands.NumDefs;
- if (DstNumDefs == 0)
- return InsertPt;
-
- for (unsigned I = Start; I < SrcNumDefs; ++I) {
- std::string OpName = getMangledRootDefName(DstI->Operands[I].Name);
- // CopyRenderer saves a StringRef, so cannot pass OpName itself -
- // let's use a string with an appropriate lifetime.
- StringRef PermanentRef = M.getOperandMatcher(OpName).getSymbolicName();
- DstMIBuilder.addRenderer<CopyRenderer>(PermanentRef);
- }
+ const unsigned DstExpDefs = DstI->Operands.NumDefs;
+ const unsigned DstNumDefs = DstExpDefs + DstI->ImplicitDefs.size();
+ bool IsRoot = &Dst == &P.getDstPattern();
+
+ unsigned I = Start;
+ for (; I < DstExpDefs; ++I) {
+ const CGIOperandList::OperandInfo &OpInfo = DstI->Operands[I];
+ std::string OpName = getMangledRootDefName(OpInfo.Name);
+
+ if (M.hasOperand(OpName)) {
+ // CopyRenderer saves a StringRef, so cannot pass OpName itself -
+ // let's use a string with an appropriate lifetime.
+ StringRef PermanentRef = M.getOperandMatcher(OpName).getSymbolicName();
+ DstMIBuilder.addRenderer<CopyRenderer>(PermanentRef);
+ continue;
+ }
- // Some instructions have multiple defs, but are missing a type entry
- // (e.g. s_cc_out operands).
- if (Dst.getExtTypes().size() < DstNumDefs)
- return failedImport("unhandled discarded def");
+ if (OpInfo.Rec->isSubClassOf("OptionalDefOperand")) {
+ const DAGDefaultOperand &ComplexOp = CGP.getDefaultOperand(OpInfo.Rec);
+ for (const TreePatternNode &SubOp :
+ make_pointee_range(ComplexOp.DefaultOps)) {
+ const Record *Reg = cast<DefInit>(SubOp.getLeafValue())->getDef();
+ assert(Reg->isSubClassOf("Register"));
+ DstMIBuilder.addRenderer<AddRegisterRenderer>(
+ Target, Reg, /*IsDef=*/true, /*IsDead=*/true);
+ }
+ continue;
+ }
- for (unsigned I = SrcNumDefs; I < DstNumDefs; ++I) {
const TypeSetByHwMode &ExtTy = Dst.getExtType(I);
if (!ExtTy.isMachineValueType())
return failedImport("unsupported typeset");
@@ -1484,7 +1481,30 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitDefRenderers(
unsigned TempRegID = M.allocateTempRegID();
InsertPt =
M.insertAction<MakeTempRegisterAction>(InsertPt, *OpTy, TempRegID);
- DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID, true, nullptr, true);
+ DstMIBuilder.addRenderer<TempRegRenderer>(
+ TempRegID, /*IsDef=*/true, /*SubReg=*/nullptr, /*IsDead=*/true);
+ }
+
+ for (; I < DstNumDefs; ++I) {
+ const Record *Reg = DstI->ImplicitDefs[I - DstExpDefs];
+ std::string OpName = getMangledRootDefName(Reg->getName());
+
+ if (!IsRoot || !M.hasOperand(OpName)) {
+ DstMIBuilder.setDeadImplicitDef(Reg);
+ continue;
+ }
+
+ BuildMIAction &CopyBuilder = M.addAction<BuildMIAction>(
+ M.allocateOutputInsnID(), &Target.getInstruction(RK.getDef("COPY")));
+
+ StringRef PermanentRef = M.getOperandMatcher(OpName).getSymbolicName();
+ CopyBuilder.addRenderer<CopyRenderer>(PermanentRef);
+ CopyBuilder.addRenderer<AddRegisterRenderer>(Target, Reg);
+
+ const CodeGenRegisterClass *RC = CGRegs.getRegClassForRegister(Reg);
+ assert(RC);
+ M.addAction<ConstrainOperandToRegClassAction>(CopyBuilder.getInsnID(),
+ /*OpIdx=*/0, *RC);
}
return InsertPt;
@@ -1492,7 +1512,7 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitDefRenderers(
Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
- const llvm::TreePatternNode &Dst, const llvm::TreePatternNode &Src) {
+ const PatternToMatch &P, const TreePatternNode &Dst) {
const CodeGenInstruction *DstI = DstMIBuilder.getCGI();
CodeGenInstruction *OrigDstI = &Target.getInstruction(Dst.getOperator());
@@ -1522,7 +1542,7 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
TempRegID);
auto InsertPtOrError = createAndImportSubInstructionRenderer(
- ++InsertPt, M, ValChild, Src, TempRegID);
+ ++InsertPt, M, P, ValChild, TempRegID);
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
@@ -1580,7 +1600,7 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef());
auto InsertPtOrError =
- importExplicitUseRenderer(InsertPt, M, DstMIBuilder, ValChild, Src);
+ importExplicitUseRenderer(InsertPt, M, DstMIBuilder, P, ValChild);
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
InsertPt = InsertPtOrError.get();
@@ -1649,7 +1669,7 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
}
auto InsertPtOrError = importExplicitUseRenderer(InsertPt, M, DstMIBuilder,
- Dst.getChild(Child), Src);
+ P, Dst.getChild(Child));
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
InsertPt = InsertPtOrError.get();
@@ -1707,13 +1727,6 @@ Error GlobalISelEmitter::importDefaultOperandRenderers(
return Error::success();
}
-Error GlobalISelEmitter::importImplicitDefRenderers(
- BuildMIAction &DstMIBuilder, ArrayRef<const Record *> ImplicitDefs) const {
- if (!ImplicitDefs.empty())
- return failedImport("Pattern defines a physical register");
- return Error::success();
-}
-
Error GlobalISelEmitter::constrainOperands(action_iterator InsertPt,
RuleMatcher &M, unsigned InsnID,
const TreePatternNode &Dst) {
@@ -2039,7 +2052,7 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
const auto SavedFlags = M.setGISelFlags(P.getSrcRecord());
auto InsnMatcherOrError =
- createAndImportSelDAGMatcher(M, InsnMatcherTemp, Src, TempOpIdx);
+ createAndImportSelDAGMatcher(M, InsnMatcherTemp, P, Src, TempOpIdx);
if (auto Error = InsnMatcherOrError.takeError())
return std::move(Error);
InstructionMatcher &InsnMatcher = InsnMatcherOrError.get();
@@ -2090,13 +2103,14 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
unsigned DstExpDefs = DstI.Operands.NumDefs,
DstNumDefs = DstI.ImplicitDefs.size() + DstExpDefs,
SrcNumDefs = Src.getExtTypes().size();
+
+ bool FoundNoUsePred = false;
if (DstNumDefs < SrcNumDefs) {
if (DstNumDefs != 0)
return failedImport("Src pattern result has more defs than dst MI (" +
to_string(SrcNumDefs) + " def(s) vs " +
to_string(DstNumDefs) + " def(s))");
- bool FoundNoUsePred = false;
for (const auto &Pred : InsnMatcher.predicates()) {
if ((FoundNoUsePred = isa<NoUsePredicateMatcher>(Pred.get())))
break;
@@ -2109,15 +2123,24 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
// The root of the match also has constraints on the register bank so that it
// matches the result instruction.
- unsigned N = std::min(DstExpDefs, SrcNumDefs);
- for (unsigned I = 0; I < N; ++I) {
- const auto &DstIOperand = DstI.Operands[I];
+ for (unsigned I = 0; I < SrcNumDefs; ++I) {
+ if (FoundNoUsePred)
+ continue;
OperandMatcher &OM = InsnMatcher.getOperand(I);
+
+ if (I >= DstExpDefs) {
+ const Record *Reg = DstI.ImplicitDefs[I - DstExpDefs];
+ OM.setSymbolicName(getMangledRootDefName(Reg->getName()));
+ M.defineOperand(OM.getSymbolicName(), OM);
+ continue;
+ }
+
// The operand names declared in the DstI instruction are unrelated to
// those used in pattern's source and destination DAGs, so mangle the
// former to prevent implicitly adding unexpected
// GIM_CheckIsSameOperand predicates by the defineOperand method.
+ const CGIOperandList::OperandInfo &DstIOperand = DstI.Operands[I];
OM.setSymbolicName(getMangledRootDefName(DstIOperand.Name));
M.defineOperand(OM.getSymbolicName(), OM);
@@ -2130,16 +2153,11 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
}
auto DstMIBuilderOrError =
- createAndImportInstructionRenderer(M, InsnMatcher, Src, Dst);
+ createAndImportInstructionRenderer(M, InsnMatcher, P, Src, Dst);
if (auto Error = DstMIBuilderOrError.takeError())
return std::move(Error);
BuildMIAction &DstMIBuilder = DstMIBuilderOrError.get();
- // Render the implicit defs.
- // These are only added to the root of the result.
- if (auto Error = importImplicitDefRenderers(DstMIBuilder, P.getDstRegs()))
- return std::move(Error);
-
DstMIBuilder.chooseInsnToMutate(M);
// Constrain the registers to classes. This is normally derived from the
@@ -2176,15 +2194,15 @@ GlobalISelEmitter::buildMatchTable(MutableArrayRef<RuleMatcher> Rules,
OpcodeOrder[Opcode] = CurrentOrdering++;
}
- llvm::stable_sort(InputRules, [&OpcodeOrder](const Matcher *A,
- const Matcher *B) {
- auto *L = static_cast<const RuleMatcher *>(A);
- auto *R = static_cast<const RuleMatcher *>(B);
- return std::tuple(OpcodeOrder[L->getOpcode()],
- L->insnmatchers_front().getNumOperandMatchers()) <
- std::tuple(OpcodeOrder[R->getOpcode()],
- R->insnmatchers_front().getNumOperandMatchers());
- });
+ llvm::stable_sort(
+ InputRules, [&OpcodeOrder](const Matcher *A, const Matcher *B) {
+ auto *L = static_cast<const RuleMatcher *>(A);
+ auto *R = static_cast<const RuleMatcher *>(B);
+ return std::tuple(OpcodeOrder[L->getOpcode()],
+ L->insnmatchers_front().getNumOperandMatchers()) <
+ std::tuple(OpcodeOrder[R->getOpcode()],
+ R->insnmatchers_front().getNumOperandMatchers());
+ });
for (Matcher *Rule : InputRules)
Rule->optimize();
@@ -2299,31 +2317,6 @@ void GlobalISelEmitter::emitRunCustomAction(raw_ostream &OS) {
<< "}\n";
}
-void GlobalISelEmitter::postProcessRule(RuleMatcher &M) {
- SmallPtrSet<const Record *, 16> UsedRegs;
-
- // TODO: deal with subregs?
- for (auto &A : M.actions()) {
- auto *MI = dyn_cast<BuildMIAction>(A.get());
- if (!MI)
- continue;
-
- for (auto *Use : MI->getCGI()->ImplicitUses)
- UsedRegs.insert(Use);
- }
-
- for (auto &A : M.actions()) {
- auto *MI = dyn_cast<BuildMIAction>(A.get());
- if (!MI)
- continue;
-
- for (auto *Def : MI->getCGI()->ImplicitDefs) {
- if (!UsedRegs.contains(Def))
- MI->setDeadImplicitDef(Def);
- }
- }
-}
-
void GlobalISelEmitter::run(raw_ostream &OS) {
if (!UseCoverageFile.empty()) {
RuleCoverage = CodeGenCoverage();
@@ -2383,7 +2376,6 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
"Pattern is not covered by a test");
}
Rules.push_back(std::move(MatcherOrErr.get()));
- postProcessRule(Rules.back());
}
// Comparison function to order records by name.
More information about the llvm-commits
mailing list