[llvm] [WIP][TableGen][GISel] Learn to import patterns with optional/physreg defs (PR #120343)

Sergei Barannikov via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 17 17:27:57 PST 2024


https://github.com/s-barannikov created https://github.com/llvm/llvm-project/pull/120343

None

>From 3bdc2f28d485445657271ba6587203c0412c9b58 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <s.barannikov at module.ru>
Date: Wed, 18 Dec 2024 02:05:14 +0300
Subject: [PATCH 1/2] [TableGen][GISel] Import more "multi-level" patterns

Previously, if the destination DAG has an untyped leaf, we would import
the pattern only if that leaf is defined by the *top-level* source DAG.
This is an unnecessary restriction.

Here is an example of such pattern:
```
def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC),
          (VMLADDUHM $vA, $vB, $vC)>;
```

Previously, it failed to import because `add` doesn't define neither
`$vA` nor `$vB`.

This change reduces the number of skipped patterns as follows:

```
AArch64: 8695 ->  8548
AMDGPU: 11333 -> 11240
ARM:     4297 ->  4278
PowerPC: 3955 ->  3010
```

Other GISel-enabled targets are unaffected.
---
 .../GlobalISel/inst-select-ashr.s16.mir       |   14 +-
 .../AMDGPU/GlobalISel/inst-select-ctpop.mir   |   25 +-
 .../GlobalISel/inst-select-lshr.s16.mir       |   14 +-
 .../AMDGPU/GlobalISel/inst-select-shl.s16.mir |   14 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll   |  170 +-
 llvm/test/CodeGen/AMDGPU/constrained-shift.ll |    4 -
 .../CodeGen/AMDGPU/integer-mad-patterns.ll    | 1612 +++++++----------
 llvm/utils/TableGen/GlobalISelEmitter.cpp     |   11 +-
 8 files changed, 821 insertions(+), 1043 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir
index fb7c2d4d705e75..95d2bae98df2e1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir
@@ -274,24 +274,18 @@ body: |
     ; GFX8-NEXT: {{  $}}
     ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
-    ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-    ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
-    ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_e64_]], implicit $exec
-    ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-    ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ASHRREV_I16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
     ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX9-LABEL: name: ashr_s16_vv_zext_to_s64
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9-NEXT: {{  $}}
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
-    ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-    ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
-    ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_e64_]], implicit $exec
-    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-    ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ASHRREV_I16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
     ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX10-LABEL: name: ashr_s16_vv_zext_to_s64
     ; GFX10: liveins: $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir
index 779312596313a3..3a2ed71e4d2242 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir
@@ -79,9 +79,8 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec
-    ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY1]], 0, implicit $exec
-    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+    ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s32) = G_CTPOP %0
@@ -104,9 +103,8 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec
-    ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY1]], [[V_BCNT_U32_B32_e64_]], 0, implicit $exec
-    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+    ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s32) = G_CTPOP %0
@@ -155,9 +153,8 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec
-    ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY1]], 0, implicit $exec
-    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+    ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr(s32) = COPY $sgpr0
     %2:vgpr(s32) = G_CTPOP %0
@@ -181,9 +178,8 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY1]], 0, implicit $exec
-    ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY]], 0, implicit $exec
-    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+    ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr(s32) = COPY $sgpr0
     %2:vgpr(s32) = G_CTPOP %1
@@ -207,9 +203,8 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; CHECK-NEXT: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def dead $scc
-    ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_BCNT1_I32_B32_]], [[COPY1]], 0, implicit $exec
-    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+    ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s32) = G_CTPOP %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir
index e7ec5fcbba2473..a96b574a647848 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir
@@ -272,24 +272,18 @@ body: |
     ; GFX8-NEXT: {{  $}}
     ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
-    ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-    ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
-    ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_e64_]], implicit $exec
-    ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-    ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHRREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
     ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX9-LABEL: name: lshr_s16_vv_zext_to_s64
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9-NEXT: {{  $}}
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
-    ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-    ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
-    ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_e64_]], implicit $exec
-    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-    ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHRREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
     ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX10-LABEL: name: lshr_s16_vv_zext_to_s64
     ; GFX10: liveins: $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir
index bcb6d75c18302b..b0703a642e033a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir
@@ -272,24 +272,18 @@ body: |
     ; GFX8-NEXT: {{  $}}
     ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
-    ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-    ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
-    ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHLREV_B16_e64_]], implicit $exec
-    ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-    ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHLREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
     ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX9-LABEL: name: shl_s16_vv_zext_to_s64
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9-NEXT: {{  $}}
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
-    ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-    ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
-    ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHLREV_B16_e64_]], implicit $exec
-    ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-    ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHLREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
     ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX10-LABEL: name: shl_s16_vv_zext_to_s64
     ; GFX10: liveins: $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
index 6bb4e2d3dbe26e..ed85fb19d90517 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
@@ -204,18 +204,37 @@ define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b)
 }
 
 define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) {
-; GCN-LABEL: vector_xnor_i32_one_use:
-; GCN:       ; %bb.0: ; %entry
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_xor_b32_e32 v0, v0, v1
-; GCN-NEXT:    v_not_b32_e32 v0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GFX7-LABEL: vector_xnor_i32_one_use:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_xor_b32_e32 v0, v0, v1
+; GFX7-NEXT:    v_not_b32_e32 v0, v0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: vector_xnor_i32_one_use:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_xor_b32_e32 v0, v0, v1
+; GFX8-NEXT:    v_not_b32_e32 v0, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: vector_xnor_i32_one_use:
+; GFX900:       ; %bb.0: ; %entry
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_xor_b32_e32 v0, v0, v1
+; GFX900-NEXT:    v_not_b32_e32 v0, v0
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-LABEL: vector_xnor_i32_one_use:
+; GFX906:       ; %bb.0: ; %entry
+; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT:    v_xnor_b32_e32 v0, v0, v1
+; GFX906-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: vector_xnor_i32_one_use:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v1
-; GFX10-NEXT:    v_not_b32_e32 v0, v0
+; GFX10-NEXT:    v_xnor_b32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %xor = xor i32 %a, %b
@@ -224,22 +243,45 @@ entry:
 }
 
 define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) {
-; GCN-LABEL: vector_xnor_i64_one_use:
-; GCN:       ; %bb.0: ; %entry
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GCN-NEXT:    v_xor_b32_e32 v1, v1, v3
-; GCN-NEXT:    v_not_b32_e32 v0, v0
-; GCN-NEXT:    v_not_b32_e32 v1, v1
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GFX7-LABEL: vector_xnor_i64_one_use:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GFX7-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX7-NEXT:    v_not_b32_e32 v0, v0
+; GFX7-NEXT:    v_not_b32_e32 v1, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: vector_xnor_i64_one_use:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GFX8-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX8-NEXT:    v_not_b32_e32 v0, v0
+; GFX8-NEXT:    v_not_b32_e32 v1, v1
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: vector_xnor_i64_one_use:
+; GFX900:       ; %bb.0: ; %entry
+; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GFX900-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX900-NEXT:    v_not_b32_e32 v0, v0
+; GFX900-NEXT:    v_not_b32_e32 v1, v1
+; GFX900-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX906-LABEL: vector_xnor_i64_one_use:
+; GFX906:       ; %bb.0: ; %entry
+; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT:    v_xnor_b32_e32 v0, v0, v2
+; GFX906-NEXT:    v_xnor_b32_e32 v1, v1, v3
+; GFX906-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: vector_xnor_i64_one_use:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v3
-; GFX10-NEXT:    v_not_b32_e32 v0, v0
-; GFX10-NEXT:    v_not_b32_e32 v1, v1
+; GFX10-NEXT:    v_xnor_b32_e32 v0, v0, v2
+; GFX10-NEXT:    v_xnor_b32_e32 v1, v1, v3
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %xor = xor i64 %a, %b
@@ -248,16 +290,32 @@ entry:
 }
 
 define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
-; GCN-LABEL: xnor_s_v_i32_one_use:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GCN-NEXT:    v_not_b32_e32 v0, v0
-; GCN-NEXT:    ; return to shader part epilog
+; GFX7-LABEL: xnor_s_v_i32_one_use:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX7-NEXT:    v_not_b32_e32 v0, v0
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: xnor_s_v_i32_one_use:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX8-NEXT:    v_not_b32_e32 v0, v0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX900-LABEL: xnor_s_v_i32_one_use:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX900-NEXT:    v_not_b32_e32 v0, v0
+; GFX900-NEXT:    ; return to shader part epilog
+;
+; GFX906-LABEL: xnor_s_v_i32_one_use:
+; GFX906:       ; %bb.0:
+; GFX906-NEXT:    v_xnor_b32_e32 v0, s0, v0
+; GFX906-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-LABEL: xnor_s_v_i32_one_use:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX10-NEXT:    v_not_b32_e32 v0, v0
+; GFX10-NEXT:    v_xnor_b32_e32 v0, s0, v0
 ; GFX10-NEXT:    ; return to shader part epilog
   %xor = xor i32 %s, %v
   %d = xor i32 %xor, -1
@@ -266,16 +324,32 @@ define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
 }
 
 define amdgpu_ps float @xnor_v_s_i32_one_use(i32 inreg %s, i32 %v) {
-; GCN-LABEL: xnor_v_s_i32_one_use:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GCN-NEXT:    v_not_b32_e32 v0, v0
-; GCN-NEXT:    ; return to shader part epilog
+; GFX7-LABEL: xnor_v_s_i32_one_use:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX7-NEXT:    v_not_b32_e32 v0, v0
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: xnor_v_s_i32_one_use:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX8-NEXT:    v_not_b32_e32 v0, v0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX900-LABEL: xnor_v_s_i32_one_use:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX900-NEXT:    v_not_b32_e32 v0, v0
+; GFX900-NEXT:    ; return to shader part epilog
+;
+; GFX906-LABEL: xnor_v_s_i32_one_use:
+; GFX906:       ; %bb.0:
+; GFX906-NEXT:    v_xnor_b32_e64 v0, v0, s0
+; GFX906-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-LABEL: xnor_v_s_i32_one_use:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX10-NEXT:    v_not_b32_e32 v0, v0
+; GFX10-NEXT:    v_xnor_b32_e64 v0, v0, s0
 ; GFX10-NEXT:    ; return to shader part epilog
   %xor = xor i32 %v, %s
   %d = xor i32 %xor, -1
@@ -314,19 +388,15 @@ define amdgpu_ps <2 x float> @xnor_i64_s_v_one_use(i64 inreg %a, i64 %b64) {
 ; GFX906-LABEL: xnor_i64_s_v_one_use:
 ; GFX906:       ; %bb.0: ; %entry
 ; GFX906-NEXT:    v_lshlrev_b64 v[0:1], 29, v[0:1]
-; GFX906-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX906-NEXT:    v_xor_b32_e32 v1, s1, v1
-; GFX906-NEXT:    v_not_b32_e32 v0, v0
-; GFX906-NEXT:    v_not_b32_e32 v1, v1
+; GFX906-NEXT:    v_xnor_b32_e32 v0, s0, v0
+; GFX906-NEXT:    v_xnor_b32_e32 v1, s1, v1
 ; GFX906-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-LABEL: xnor_i64_s_v_one_use:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 29, v[0:1]
-; GFX10-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX10-NEXT:    v_xor_b32_e32 v1, s1, v1
-; GFX10-NEXT:    v_not_b32_e32 v0, v0
-; GFX10-NEXT:    v_not_b32_e32 v1, v1
+; GFX10-NEXT:    v_xnor_b32_e32 v0, s0, v0
+; GFX10-NEXT:    v_xnor_b32_e32 v1, s1, v1
 ; GFX10-NEXT:    ; return to shader part epilog
 entry:
   %b = shl i64 %b64, 29
@@ -367,19 +437,15 @@ define amdgpu_ps <2 x float> @xnor_i64_v_s_one_use(i64 inreg %a, i64 %b64) {
 ; GFX906-LABEL: xnor_i64_v_s_one_use:
 ; GFX906:       ; %bb.0:
 ; GFX906-NEXT:    v_lshlrev_b64 v[0:1], 29, v[0:1]
-; GFX906-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX906-NEXT:    v_xor_b32_e32 v1, s1, v1
-; GFX906-NEXT:    v_not_b32_e32 v0, v0
-; GFX906-NEXT:    v_not_b32_e32 v1, v1
+; GFX906-NEXT:    v_xnor_b32_e64 v0, v0, s0
+; GFX906-NEXT:    v_xnor_b32_e64 v1, v1, s1
 ; GFX906-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-LABEL: xnor_i64_v_s_one_use:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 29, v[0:1]
-; GFX10-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX10-NEXT:    v_xor_b32_e32 v1, s1, v1
-; GFX10-NEXT:    v_not_b32_e32 v0, v0
-; GFX10-NEXT:    v_not_b32_e32 v1, v1
+; GFX10-NEXT:    v_xnor_b32_e64 v0, v0, s0
+; GFX10-NEXT:    v_xnor_b32_e64 v1, v1, s1
 ; GFX10-NEXT:    ; return to shader part epilog
   %b = shl i64 %b64, 29
   %xor = xor i64 %b, %a
@@ -419,7 +485,7 @@ define i32 @vector_xor_na_b_i32_one_use(i32 %a, i32 %b) {
 ; GFX10-LABEL: vector_xor_na_b_i32_one_use:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_xor3_b32 v0, v0, -1, v1
+; GFX10-NEXT:    v_xnor_b32_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %na = xor i32 %a, -1
@@ -458,7 +524,7 @@ define i32 @vector_xor_a_nb_i32_one_use(i32 %a, i32 %b) {
 ; GFX10-LABEL: vector_xor_a_nb_i32_one_use:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_xor3_b32 v0, v1, -1, v0
+; GFX10-NEXT:    v_xnor_b32_e32 v0, v1, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %nb = xor i32 %b, -1
diff --git a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll
index 1b35a89ad7f935..4011c21af69046 100644
--- a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll
+++ b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll
@@ -139,10 +139,6 @@ define <4 x i32> @csh_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; GISEL-LABEL: csh_v4i32:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    v_and_b32_e32 v4, 31, v4
-; GISEL-NEXT:    v_and_b32_e32 v5, 31, v5
-; GISEL-NEXT:    v_and_b32_e32 v6, 31, v6
-; GISEL-NEXT:    v_and_b32_e32 v7, 31, v7
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v8, v4, v0
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v9, v5, v1
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v10, v6, v2
diff --git a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
index 9f093cc7b5abf2..26a4ea9d8a4b6e 100644
--- a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
@@ -230,49 +230,27 @@ entry:
 }
 
 define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) {
-; GFX67-SDAG-LABEL: clpeak_imad_pat_i16:
-; GFX67-SDAG:       ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v1
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v0, v2
-; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v1, v3
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v4
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: clpeak_imad_pat_i16:
-; GFX67-GISEL:       ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX67-LABEL: clpeak_imad_pat_i16:
+; GFX67:       ; %bb.0: ; %entry
+; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v1
+; GFX67-NEXT:    v_mul_u32_u24_e32 v3, v0, v2
+; GFX67-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
+; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v1, v3
+; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v4
+; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
+; GFX67-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX67-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: clpeak_imad_pat_i16:
 ; GFX8-SDAG:       ; %bb.0: ; %entry
@@ -337,11 +315,11 @@ define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) {
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX10-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
 ; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v0
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
+; GFX10-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
 ; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
 ; GFX10-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -363,13 +341,13 @@ define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) {
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX11-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v0
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
+; GFX11-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
 ; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
@@ -400,13 +378,13 @@ define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) {
 ; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX1200-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v0
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
 ; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
 ; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
 ; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1200-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
@@ -470,42 +448,40 @@ define <2 x i16> @clpeak_imad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) {
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v5, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v4, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v5, v3, v1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v4, v2, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v5, v5, v3, 1
 ; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v4, v4, v2, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v7, v1, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v3, v3, v4
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v6, v0, v2
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v6
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v4
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v7
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v4
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v2, v0
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
 ; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i16:
@@ -682,46 +658,43 @@ define <3 x i16> @clpeak_imad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v6, v6, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v7, 0xffff, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v7, v7, v4
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v6, v3, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v2
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v8, v8, v5
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v7, v1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v7, v4, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, v8, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v3
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v8, v5, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v9, v0, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v6
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v4
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v6, v6, v3, 1
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v10, v1, v4
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v7
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v7, v7, v4, 1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v3, 1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v4, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v9
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v6
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v11, v2, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v8, v8, v5, 1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v2, v5, 1
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v4
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v10
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v7
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v4, v5
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v11
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v8
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v5
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v8
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v6
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v3
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v5, v6
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v3, v0
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v4
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v7
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v5
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v8
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v3, v2
 ; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: clpeak_imad_pat_v3i16:
@@ -1063,19 +1036,15 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v8
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v10, v10, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v10, v5, v1
 ; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v11, 16, v9
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v8, v8, v4
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v9, 0xffff, v9
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v6
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v7, 0xffff, v7
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v9, v9, v6
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v11, v11, v7
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v8, v4, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, v11, v3
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v9, v6, v2
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v11, v7, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
@@ -1085,60 +1054,60 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
 ; GFX67-GISEL-NEXT:    v_or_b32_e32 v1, v1, v2
 ; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v5
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v10
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v13, v2, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v10, v10, v5, 1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v2, v5, 1
 ; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v4
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v12, v0, v4
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v8, v8, v4, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v10, 0xffff, v10
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v4, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v8
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v6
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v7
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v9
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v11
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v6
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v7
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v2
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v5, v5, v6
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v7, 0xffff, v7
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v3
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v6
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v6, v6, v7
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v7, 0xffff, v8
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v9
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v8, 16, v8
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v7, v7, v8
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v15, v3, v7
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v11, v11, v7, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v8
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v3, v7, 1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v14, v1, v6
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v9, v9, v6, 1
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v8, v8, v10
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v10, 0xffff, v11
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v6, 1
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v3
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v9, 0xffff, v9
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v12
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v8
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v9, v9, v10
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v2, v1, v2
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v8
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v4, v5
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v13
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v5, v1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v14
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v9
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v9
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v5, v6
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v15
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v6, v3
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v7, 16, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v4, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v5
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v9, 16, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v8
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v1, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v1, v9
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v6
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v7
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v6
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v2, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v7
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v6
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v4
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v5
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v6
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v4, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v7
 ; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: clpeak_imad_pat_v4i16:
@@ -1403,47 +1372,26 @@ entry:
 }
 
 define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) {
-; GFX67-SDAG-LABEL: clpeak_umad_pat_i16:
-; GFX67-SDAG:       ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v2, v0, v1
-; GFX67-SDAG-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v3, v2
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v1, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v4
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v3, v2, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: clpeak_umad_pat_i16:
-; GFX67-GISEL:       ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX67-LABEL: clpeak_umad_pat_i16:
+; GFX67:       ; %bb.0: ; %entry
+; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_mul_u32_u24_e32 v2, v0, v1
+; GFX67-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
+; GFX67-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v3, v2
+; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v1, 1
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v4
+; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT:    v_mad_u32_u24 v1, v3, v2, 1
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: clpeak_umad_pat_i16:
 ; GFX8-SDAG:       ; %bb.0: ; %entry
@@ -1504,11 +1452,11 @@ define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) {
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX10-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
 ; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v0
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
+; GFX10-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
 ; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -1530,13 +1478,13 @@ define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) {
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX11-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v0
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
+; GFX11-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
 ; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
@@ -1567,13 +1515,13 @@ define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) {
 ; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX1200-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v0
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
 ; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
 ; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
 ; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1200-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
@@ -1637,42 +1585,40 @@ define <2 x i16> @clpeak_umad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) {
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v5, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v4, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v5, v3, v1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v4, v2, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v5, v5, v3, 1
 ; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v4, v4, v2, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v7, v1, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v3, v3, v4
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v6, v0, v2
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v6
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v4
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v7
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v4
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v2, v0
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
 ; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: clpeak_umad_pat_v2i16:
@@ -1849,46 +1795,43 @@ define <3 x i16> @clpeak_umad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v6, v6, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v7, 0xffff, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v7, v7, v4
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v6, v3, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v2
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v8, v8, v5
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v7, v1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v7, v4, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, v8, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v3
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v8, v5, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v9, v0, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v6
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v4
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v6, v6, v3, 1
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v10, v1, v4
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v7
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v7, v7, v4, 1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v3, 1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v4, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v9
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v6
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v11, v2, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v8, v8, v5, 1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v2, v5, 1
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v4
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v10
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v7
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v4, v5
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v11
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v8
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v5
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v8
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v6
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v3
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v5, v6
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v3, v0
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v4
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v7
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v5
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v8
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v3, v2
 ; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: clpeak_umad_pat_v3i16:
@@ -2230,19 +2173,15 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v8
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v10, v10, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v10, v5, v1
 ; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v11, 16, v9
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v8, v8, v4
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v9, 0xffff, v9
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v6
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v7, 0xffff, v7
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v9, v9, v6
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v11, v11, v7
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v8, v4, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, v11, v3
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v9, v6, v2
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v11, v7, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
@@ -2252,60 +2191,60 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
 ; GFX67-GISEL-NEXT:    v_or_b32_e32 v1, v1, v2
 ; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v5
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v10
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v13, v2, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v10, v10, v5, 1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v2, v5, 1
 ; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v4
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v12, v0, v4
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v8, v8, v4, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v10, 0xffff, v10
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v4, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v8
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v6
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v7
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v9
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v11
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v6
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v7
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v2
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v5, v5, v6
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v7, 0xffff, v7
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v3
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v6
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v6, v6, v7
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v7, 0xffff, v8
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v9
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v8, 16, v8
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v7, v7, v8
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v15, v3, v7
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v11, v11, v7, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v8
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v3, v7, 1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v14, v1, v6
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v9, v9, v6, 1
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v8, v8, v10
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v10, 0xffff, v11
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v6, 1
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v3
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v9, 0xffff, v9
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v12
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v8
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v9, v9, v10
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v2, v1, v2
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v8
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v4, v5
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v13
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v5, v1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v14
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v9
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v9
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v5, v6
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v15
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v6, v3
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v7, 16, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v4, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v5
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v9, 16, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v8
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v1, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v1, v9
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v6
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v7
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v6
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v2, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v7
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v6
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v4
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v5
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v6
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v4, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v7
 ; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: clpeak_umad_pat_v4i16:
@@ -4282,49 +4221,27 @@ entry:
 }
 
 define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) {
-; GFX67-SDAG-LABEL: clpeak_imad_pat_i8:
-; GFX67-SDAG:       ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xff, v1
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v0, v2
-; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xff, v1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xff, v3
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v1, v3
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xff, v4
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xff, v1
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: clpeak_imad_pat_i8:
-; GFX67-GISEL:       ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX67-LABEL: clpeak_imad_pat_i8:
+; GFX67:       ; %bb.0: ; %entry
+; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xff, v0
+; GFX67-NEXT:    v_and_b32_e32 v2, 0xff, v1
+; GFX67-NEXT:    v_mul_u32_u24_e32 v3, v0, v2
+; GFX67-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
+; GFX67-NEXT:    v_and_b32_e32 v1, 0xff, v1
+; GFX67-NEXT:    v_and_b32_e32 v3, 0xff, v3
+; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v1, v3
+; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xff, v0
+; GFX67-NEXT:    v_and_b32_e32 v2, 0xff, v4
+; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
+; GFX67-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xff, v0
+; GFX67-NEXT:    v_and_b32_e32 v1, 0xff, v1
+; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; GFX67-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: clpeak_imad_pat_i8:
 ; GFX8-SDAG:       ; %bb.0: ; %entry
@@ -4389,11 +4306,11 @@ define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) {
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX10-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
 ; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v0
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
+; GFX10-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
 ; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
 ; GFX10-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 8
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -4415,13 +4332,13 @@ define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) {
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX11-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v0
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
+; GFX11-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
 ; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 8
@@ -4452,13 +4369,13 @@ define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) {
 ; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX1200-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v0
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
 ; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
 ; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
 ; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1200-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 8
@@ -4524,32 +4441,30 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) {
 ; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xff, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v4, v2
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xff, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xff, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v5, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v4, v2, v0
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v5, v3, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xff, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v6, v0, v2
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xff, v0
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v4, v4, v2, 1
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v7, v1, v3
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v5, v5, v3, 1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v6
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xff, v4
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xff, v7
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xff, v5
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v4
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v4
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v2, v0
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v2, v1
 ; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i8:
@@ -4655,20 +4570,18 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) {
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v4, v0, v2
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v5, v1, v3
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v4, v0
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v5, v1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v2
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v3
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v2, v4, 1
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v3, v5, 1
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v4, v0, 1
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v5, v1, 1
+; GFX10-GISEL-NEXT:    v_mad_u16 v4, v0, v2, v0
+; GFX10-GISEL-NEXT:    v_mad_u16 v5, v1, v3, v1
+; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v2, 1
+; GFX10-GISEL-NEXT:    v_mad_u16 v1, v1, v3, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v6, v4, v2
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v7, v5, v3
+; GFX10-GISEL-NEXT:    v_mad_u16 v2, v4, v2, 1
+; GFX10-GISEL-NEXT:    v_mad_u16 v3, v5, v3, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v6, v0
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v7, v1
 ; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v2
 ; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v3
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v4
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v5
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i8:
@@ -4704,25 +4617,21 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) {
 ; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX11-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v4, v0, v2
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v5, v1, v3
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v4, v0
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v1, v5, v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v2
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v3
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v2, v4, 1
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v3, v5, 1
+; GFX11-GISEL-NEXT:    v_mad_u16 v4, v0, v2, v0
+; GFX11-GISEL-NEXT:    v_mad_u16 v5, v1, v3, v1
+; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v2, 1
+; GFX11-GISEL-NEXT:    v_mad_u16 v1, v1, v3, 1
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v4, v0, 1
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v5, v1, 1
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v6, v4, v2
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v7, v5, v3
+; GFX11-GISEL-NEXT:    v_mad_u16 v2, v4, v2, 1
+; GFX11-GISEL-NEXT:    v_mad_u16 v3, v5, v3, 1
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v6, v0
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v7, v1
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v2
 ; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v3
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v4
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v5
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1200-SDAG-LABEL: clpeak_imad_pat_v2i8:
@@ -4766,25 +4675,21 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) {
 ; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX1200-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
 ; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v4, v0, v2
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v5, v1, v3
-; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v4, v0
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v1, v5, v1
-; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v2
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v3
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v2, v4, 1
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v3, v5, 1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v4, v0, v2, v0
+; GFX1200-GISEL-NEXT:    v_mad_u16 v5, v1, v3, v1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v2, 1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v1, v1, v3, 1
 ; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v4, v0, 1
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v5, v1, 1
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v6, v4, v2
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v7, v5, v3
+; GFX1200-GISEL-NEXT:    v_mad_u16 v2, v4, v2, 1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v3, v5, v3, 1
 ; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v6, v0
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v7, v1
+; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v2
 ; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v3
-; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v4
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v5
 ; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %y18 = add <2 x i8> %x, <i8 1, i8 1>
@@ -7600,81 +7505,43 @@ entry:
 }
 
 define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) {
-; GFX67-SDAG-LABEL: clpeak_imad_pat_i16_x2:
-; GFX67-SDAG:       ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v1
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v0, v2
-; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v1, v3
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v4
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v0, v2
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v4
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v1, v3
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v4
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v0, v2
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v4
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v1, v3
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v4
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: clpeak_imad_pat_i16_x2:
-; GFX67-GISEL:       ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX67-LABEL: clpeak_imad_pat_i16_x2:
+; GFX67:       ; %bb.0: ; %entry
+; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v1
+; GFX67-NEXT:    v_mul_u32_u24_e32 v3, v0, v2
+; GFX67-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
+; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v1, v3
+; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v4
+; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v0, v2
+; GFX67-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT:    v_and_b32_e32 v3, 0xffff, v4
+; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v1, v3
+; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v4
+; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v0, v2
+; GFX67-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT:    v_and_b32_e32 v3, 0xffff, v4
+; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v1, v3
+; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v4
+; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
+; GFX67-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX67-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: clpeak_imad_pat_i16_x2:
 ; GFX8-SDAG:       ; %bb.0: ; %entry
@@ -7767,19 +7634,19 @@ define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) {
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX10-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
+; GFX10-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
+; GFX10-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
 ; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v0
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v0
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v0
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
+; GFX10-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
 ; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
 ; GFX10-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -7807,23 +7674,23 @@ define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) {
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX11-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v0
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v0
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v0
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
+; GFX11-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
 ; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
@@ -7860,23 +7727,23 @@ define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) {
 ; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX1200-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v0
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v0
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v0
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
+; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
+; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
 ; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
 ; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
 ; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1200-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
@@ -7902,79 +7769,42 @@ entry:
 }
 
 define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) {
-; GFX67-SDAG-LABEL: clpeak_umad_pat_i16_x2:
-; GFX67-SDAG:       ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v2, v0, v1
-; GFX67-SDAG-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v3, v2
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v1, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v4
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v0, v1
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v2, v3, v2, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v4
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v2, v3
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v1, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v4
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v0, v1
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v2, v2, v3, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v4
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v2, v3
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v1, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v4
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v2, v3, 1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: clpeak_umad_pat_i16_x2:
-; GFX67-GISEL:       ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX67-LABEL: clpeak_umad_pat_i16_x2:
+; GFX67:       ; %bb.0: ; %entry
+; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_mul_u32_u24_e32 v2, v0, v1
+; GFX67-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
+; GFX67-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v3, v2
+; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v1, 1
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v4
+; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v0, v1
+; GFX67-NEXT:    v_mad_u32_u24 v2, v3, v2, 1
+; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX67-NEXT:    v_and_b32_e32 v3, 0xffff, v4
+; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v2, v3
+; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v1, 1
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v4
+; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v0, v1
+; GFX67-NEXT:    v_mad_u32_u24 v2, v2, v3, 1
+; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX67-NEXT:    v_and_b32_e32 v3, 0xffff, v4
+; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v2, v3
+; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v1, 1
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v4
+; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT:    v_mad_u32_u24 v1, v2, v3, 1
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: clpeak_umad_pat_i16_x2:
 ; GFX8-SDAG:       ; %bb.0: ; %entry
@@ -8063,19 +7893,19 @@ define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) {
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX10-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
+; GFX10-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
+; GFX10-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
 ; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v0
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v0
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v0
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
+; GFX10-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
 ; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
 ; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -8103,23 +7933,23 @@ define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) {
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX11-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v0
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v0
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v0
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
+; GFX11-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
 ; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
@@ -8156,23 +7986,23 @@ define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) {
 ; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
 ; GFX1200-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v0
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v0
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v0
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
+; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
+; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
 ; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
+; GFX1200-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
 ; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
 ; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1200-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
@@ -8268,10 +8098,8 @@ define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v5, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v4, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v5, v3, v1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v4, v2, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
@@ -8279,9 +8107,9 @@ define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
 ; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v5, v3, 1
 ; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v4
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v4, v2, 1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v2
 ; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
@@ -8290,64 +8118,60 @@ define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v1, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v0, v4
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v1, v5, v3
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v0, v4, v2
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX67-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
 ; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v3, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v2, v0
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v5, 1
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v0
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v4, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
 ; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
 ; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v1, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v0, v4
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v3, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v2, v0
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v3, v3, v4
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v3, v5, v1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v2, v4, v0
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v5, v3, v5, 1
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v4, v2, v4, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v7, v1, v3
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v6, v0, v2
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v4
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v6
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v4
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v7
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v2, v0
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
 ; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i16_x2:
@@ -8591,10 +8415,8 @@ define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v5, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v4, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v5, v3, v1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v4, v2, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
@@ -8602,9 +8424,9 @@ define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
 ; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v5, v3, 1
 ; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v4
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v4, v2, 1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v2
 ; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
@@ -8613,64 +8435,60 @@ define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v1, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v0, v4
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v1, v5, v3
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v0, v4, v2
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX67-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
 ; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v3, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v2, v0
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v5, 1
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v0
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v4, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
 ; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
 ; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v1, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v0, v4
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v3, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v2, v0
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v1
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v3, v3, v4
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v3, v5, v1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v2, v4, v0
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v5, v3, v5, 1
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v4, v2, v4, 1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v7, v1, v3
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v6, v0, v2
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v4
-; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v6
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v4
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v7
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v2, v0
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
 ; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: clpeak_umad_pat_v2i16_x2:
@@ -8908,24 +8726,14 @@ entry:
 }
 
 define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) {
-; GFX67-SDAG-LABEL: multi_use_mul_mad_i16_var:
-; GFX67-SDAG:       ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v4, v1, v2
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v4, v1, v3
-; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: multi_use_mul_mad_i16_var:
-; GFX67-GISEL:       ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v0, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v2
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
-; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX67-LABEL: multi_use_mul_mad_i16_var:
+; GFX67:       ; %bb.0: ; %entry
+; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT:    v_and_b32_e32 v4, 0xffff, v0
+; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT:    v_mad_u32_u24 v0, v4, v1, v2
+; GFX67-NEXT:    v_mad_u32_u24 v1, v4, v1, v3
+; GFX67-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: multi_use_mul_mad_i16_var:
 ; GFX8-SDAG:       ; %bb.0: ; %entry
@@ -8973,10 +8781,9 @@ define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) {
 ; GFX10-GISEL-LABEL: multi_use_mul_mad_i16_var:
 ; GFX10-GISEL:       ; %bb.0: ; %entry
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v0, v2
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, v3
-; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-GISEL-NEXT:    v_mad_u16 v2, v0, v1, v2
+; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, v3
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v2
 ; GFX10-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -8992,12 +8799,10 @@ define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) {
 ; GFX11-GISEL-LABEL: multi_use_mul_mad_i16_var:
 ; GFX11-GISEL:       ; %bb.0: ; %entry
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v1, v0, v2
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, v3
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_mad_u16 v2, v0, v1, v2
+; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v2
 ; GFX11-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -9021,12 +8826,10 @@ define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) {
 ; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
 ; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
 ; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v1, v0, v2
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, v3
-; GFX1200-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-GISEL-NEXT:    v_mad_u16 v2, v0, v1, v2
+; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, v3
+; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v2
 ; GFX1200-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
 ; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
 entry:
@@ -9108,29 +8911,17 @@ entry:
 }
 
 define i16 @other_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z, ptr addrspace(3) %ptr) {
-; GFX67-SDAG-LABEL: other_use_mul_mad_i16_var:
-; GFX67-SDAG:       ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v0, v1
-; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v1, v2
-; GFX67-SDAG-NEXT:    s_mov_b32 m0, -1
-; GFX67-SDAG-NEXT:    ds_write_b16 v3, v4
-; GFX67-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: other_use_mul_mad_i16_var:
-; GFX67-GISEL:       ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v0, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v2
-; GFX67-GISEL-NEXT:    s_mov_b32 m0, -1
-; GFX67-GISEL-NEXT:    ds_write_b16 v3, v1
-; GFX67-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX67-LABEL: other_use_mul_mad_i16_var:
+; GFX67:       ; %bb.0: ; %entry
+; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v0, v1
+; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v1, v2
+; GFX67-NEXT:    s_mov_b32 m0, -1
+; GFX67-NEXT:    ds_write_b16 v3, v4
+; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX67-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: other_use_mul_mad_i16_var:
 ; GFX8:       ; %bb.0: ; %entry
@@ -9151,69 +8942,36 @@ define i16 @other_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z, ptr addrspace(3) %
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-SDAG-LABEL: other_use_mul_mad_i16_var:
-; GFX10-SDAG:       ; %bb.0: ; %entry
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT:    v_mul_lo_u16 v4, v0, v1
-; GFX10-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v2
-; GFX10-SDAG-NEXT:    ds_write_b16 v3, v4
-; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: other_use_mul_mad_i16_var:
-; GFX10-GISEL:       ; %bb.0: ; %entry
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v0, v1
-; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v1, v2
-; GFX10-GISEL-NEXT:    ds_write_b16 v3, v1
-; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: other_use_mul_mad_i16_var:
-; GFX11-SDAG:       ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_mul_lo_u16 v4, v0, v1
-; GFX11-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v2
-; GFX11-SDAG-NEXT:    ds_store_b16 v3, v4
-; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: other_use_mul_mad_i16_var:
-; GFX11-GISEL:       ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v0, v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v1, v2
-; GFX11-GISEL-NEXT:    ds_store_b16 v3, v1
-; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: other_use_mul_mad_i16_var:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_mul_lo_u16 v4, v0, v1
+; GFX10-NEXT:    v_mad_u16 v0, v0, v1, v2
+; GFX10-NEXT:    ds_write_b16 v3, v4
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1200-SDAG-LABEL: other_use_mul_mad_i16_var:
-; GFX1200-SDAG:       ; %bb.0: ; %entry
-; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
-; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
-; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
-; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
-; GFX1200-SDAG-NEXT:    v_mul_lo_u16 v4, v0, v1
-; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v2
-; GFX1200-SDAG-NEXT:    ds_store_b16 v3, v4
-; GFX1200-SDAG-NEXT:    s_wait_dscnt 0x0
-; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: other_use_mul_mad_i16_var:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_mul_lo_u16 v4, v0, v1
+; GFX11-NEXT:    v_mad_u16 v0, v0, v1, v2
+; GFX11-NEXT:    ds_store_b16 v3, v4
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1200-GISEL-LABEL: other_use_mul_mad_i16_var:
-; GFX1200-GISEL:       ; %bb.0: ; %entry
-; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
-; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
-; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
-; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v0, v1
-; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v1, v2
-; GFX1200-GISEL-NEXT:    ds_store_b16 v3, v1
-; GFX1200-GISEL-NEXT:    s_wait_dscnt 0x0
-; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX1200-LABEL: other_use_mul_mad_i16_var:
+; GFX1200:       ; %bb.0: ; %entry
+; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1200-NEXT:    s_wait_expcnt 0x0
+; GFX1200-NEXT:    s_wait_samplecnt 0x0
+; GFX1200-NEXT:    s_wait_bvhcnt 0x0
+; GFX1200-NEXT:    s_wait_kmcnt 0x0
+; GFX1200-NEXT:    v_mul_lo_u16 v4, v0, v1
+; GFX1200-NEXT:    v_mad_u16 v0, v0, v1, v2
+; GFX1200-NEXT:    ds_store_b16 v3, v4
+; GFX1200-NEXT:    s_wait_dscnt 0x0
+; GFX1200-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %mul = mul i16 %x, %y
   %add0 = add i16 %mul, %z
@@ -9246,16 +9004,14 @@ define <4 x i16> @multi_use_mul_mad_v2i16_var(<2 x i16> %x, <2 x i16> %y, <2 x i
 ; GFX67-GISEL-LABEL: multi_use_mul_mad_v2i16_var:
 ; GFX67-GISEL:       ; %bb.0: ; %entry
 ; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v0, v2
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v0, v1
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v2, v4
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v3, v5
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v9, 0xffff, v1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v8, v2, v4
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v9, v3, v5
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v8, v2, v6
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v9, v3, v7
 ; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-SDAG-LABEL: multi_use_mul_mad_v2i16_var:
@@ -9366,20 +9122,20 @@ define <2 x i16> @other_use_mul_mad_v2i16_var(<2 x i16> %x, <2 x i16> %y, <2 x i
 ; GFX67-GISEL-LABEL: other_use_mul_mad_v2i16_var:
 ; GFX67-GISEL:       ; %bb.0: ; %entry
 ; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v3
-; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v1
-; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v8, v1, v3
+; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v7, v0, v2
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v8
+; GFX67-GISEL-NEXT:    v_and_b32_e32 v7, 0xffff, v7
+; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v8, 16, v8
+; GFX67-GISEL-NEXT:    v_or_b32_e32 v7, v7, v8
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v2, v4
+; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v3, v5
 ; GFX67-GISEL-NEXT:    s_mov_b32 m0, -1
-; GFX67-GISEL-NEXT:    ds_write_b32 v6, v2
+; GFX67-GISEL-NEXT:    ds_write_b32 v6, v7
 ; GFX67-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -9532,29 +9288,15 @@ define i64 @mul_u24_add64(i32 %x, i32 %y, i64 %z) {
 ; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
 ; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1200-SDAG-LABEL: mul_u24_add64:
-; GFX1200-SDAG:       ; %bb.0:
-; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
-; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
-; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
-; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
-; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3]
-; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX1200-GISEL-LABEL: mul_u24_add64:
-; GFX1200-GISEL:       ; %bb.0:
-; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
-; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
-; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
-; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX1200-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v0, v1
-; GFX1200-GISEL-NEXT:    v_mul_hi_u32_u24_e32 v1, v0, v1
-; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v4, v2
-; GFX1200-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX1200-LABEL: mul_u24_add64:
+; GFX1200:       ; %bb.0:
+; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1200-NEXT:    s_wait_expcnt 0x0
+; GFX1200-NEXT:    s_wait_samplecnt 0x0
+; GFX1200-NEXT:    s_wait_bvhcnt 0x0
+; GFX1200-NEXT:    s_wait_kmcnt 0x0
+; GFX1200-NEXT:    v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3]
+; GFX1200-NEXT:    s_setpc_b64 s[30:31]
   %mul = call i64 @llvm.amdgcn.mul.u24.i64(i32 %x, i32 %y)
   %add = add i64 %mul, %z
   ret i64 %add
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 83599e789e10b9..84f23985b64213 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -1350,13 +1350,10 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
 
   // Handle the case where the MVT/register class is omitted in the dest pattern
   // but MVT exists in the source pattern.
-  if (isa<UnsetInit>(DstChild.getLeafValue())) {
-    for (const TreePatternNode &SrcChild : Src.children()) {
-      if (SrcChild.getName() == DstChild.getName()) {
-        DstMIBuilder.addRenderer<CopyRenderer>(SrcChild.getName());
-        return InsertPt;
-      }
-    }
+  if (isa<UnsetInit>(DstChild.getLeafValue()) &&
+      Rule.hasOperand(DstChild.getName())) {
+    DstMIBuilder.addRenderer<CopyRenderer>(DstChild.getName());
+    return InsertPt;
   }
   return failedImport("Dst pattern child is an unsupported kind");
 }

>From 4789a60a3be0d9417522d80564f28bfd0fd882da Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Tue, 17 Dec 2024 19:39:15 +0300
Subject: [PATCH 2/2] [TableGen][GISel] Learn to import patterns with
 optional/physreg defs

---
 .../Target/GlobalISel/SelectionDAGCompat.td   |   2 +
 .../test/CodeGen/X86/GlobalISel/mul-scalar.ll |   9 +-
 .../select-intrinsic-x86-flags-read-u32.mir   |   2 +-
 .../Common/GlobalISelEmitterCommon.td         |   3 +-
 .../GlobalISelEmitter-implicit-defs.td        |  62 +++-
 .../GlobalISelEmitter-nested-subregs.td       |   2 +-
 .../TableGen/GlobalISelEmitterRegSequence.td  |   2 +-
 llvm/test/TableGen/GlobalISelEmitterSubreg.td |   8 +-
 .../TableGen/Common/CodeGenRegisters.cpp      |   2 +-
 llvm/utils/TableGen/Common/CodeGenRegisters.h |   2 +-
 .../GlobalISel/GlobalISelMatchTable.cpp       |   3 +-
 .../Common/GlobalISel/GlobalISelMatchTable.h  |   6 +-
 llvm/utils/TableGen/GlobalISelEmitter.cpp     | 300 +++++++++---------
 13 files changed, 225 insertions(+), 178 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 2148f5be4c41aa..c8c0eeb57099a2 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -70,6 +70,8 @@ def : GINodeEquiv<G_SDIV, sdiv>;
 def : GINodeEquiv<G_UDIV, udiv>;
 def : GINodeEquiv<G_SREM, srem>;
 def : GINodeEquiv<G_UREM, urem>;
+def : GINodeEquiv<G_SDIVREM, sdivrem>;
+def : GINodeEquiv<G_UDIVREM, udivrem>;
 def : GINodeEquiv<G_AND, and>;
 def : GINodeEquiv<G_OR, or>;
 def : GINodeEquiv<G_XOR, xor>;
diff --git a/llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll
index f401f45a06f6a7..3196668c70d8ec 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll
@@ -5,16 +5,17 @@
 define i8 @test_mul_i8(i8 %arg1, i8 %arg2) nounwind {
 ; X64-LABEL: test_mul_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movsbl %dil, %eax
-; X64-NEXT:    imulb %sil
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    mulb %sil
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: test_mul_i8:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    cbtw
-; X86-NEXT:    imulb %cl
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    mulb %cl
 ; X86-NEXT:    retl
   %ret = mul i8 %arg1, %arg2
   ret i8 %ret
diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-intrinsic-x86-flags-read-u32.mir b/llvm/test/CodeGen/X86/GlobalISel/select-intrinsic-x86-flags-read-u32.mir
index 332ec2240c5b60..3d1857a274b4b2 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/select-intrinsic-x86-flags-read-u32.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/select-intrinsic-x86-flags-read-u32.mir
@@ -9,7 +9,7 @@
   define void @read_flags() { ret void }
   ; CHECK-LABEL: name: read_flags
   ; CHECK: bb.0:
-  ; CHECK:   [[RDFLAGS32_:%[0-9]+]]:gr32 = RDFLAGS32 implicit-def $esp, implicit $esp
+  ; CHECK:   [[RDFLAGS32_:%[0-9]+]]:gr32 = RDFLAGS32 implicit-def dead $esp, implicit $esp
   ; CHECK:   $eax = COPY [[RDFLAGS32_]]
 ...
 
diff --git a/llvm/test/TableGen/Common/GlobalISelEmitterCommon.td b/llvm/test/TableGen/Common/GlobalISelEmitterCommon.td
index 8f11fee3751844..cfcaf3c76bbf8a 100644
--- a/llvm/test/TableGen/Common/GlobalISelEmitterCommon.td
+++ b/llvm/test/TableGen/Common/GlobalISelEmitterCommon.td
@@ -7,7 +7,8 @@ class MyTargetGenericInstruction : GenericInstruction {
 }
 
 def R0 : Register<"r0"> { let Namespace = "MyTarget"; }
-def GPR32 : RegisterClass<"MyTarget", [i32], 32, (add R0)>;
+def R1 : Register<"r0"> { let Namespace = "MyTarget"; }
+def GPR32 : RegisterClass<"MyTarget", [i32], 32, (add R0, R1)>;
 def GPR32Op : RegisterOperand<GPR32>;
 def F0 : Register<"f0"> { let Namespace = "MyTarget"; }
 def FPR32 : RegisterClass<"MyTarget", [f32], 32, (add F0)>;
diff --git a/llvm/test/TableGen/GlobalISelEmitter-implicit-defs.td b/llvm/test/TableGen/GlobalISelEmitter-implicit-defs.td
index 79af1a336f2890..ebf290a27b13ed 100644
--- a/llvm/test/TableGen/GlobalISelEmitter-implicit-defs.td
+++ b/llvm/test/TableGen/GlobalISelEmitter-implicit-defs.td
@@ -1,12 +1,60 @@
-// RUN: llvm-tblgen -gen-global-isel -warn-on-skipped-patterns -I %p/../../include -I %p/Common %s -o /dev/null 2>&1 < %s | FileCheck %s --implicit-check-not="Skipped pattern"
+// RUN: llvm-tblgen -gen-global-isel -I %p/../../include -I %p/Common %s | FileCheck %s
 
 include "llvm/Target/Target.td"
 include "GlobalISelEmitterCommon.td"
 
-// CHECK: Skipped pattern: Pattern defines a physical register
-let Uses = [B0], Defs = [B0] in
-def tst1 : I<(outs), (ins), [(set B0, (add B0, 1))]>;
+let Defs = [R0, R1] in
+def tst1 : I<(outs), (ins), [(set R0, (get_fpenv))]>;
 
-// CHECK: Skipped pattern: Src pattern result has 1 def(s) without the HasNoUse predicate set to true but Dst MI has no def
-let Uses = [B0] in
-def tst2 : I<(outs), (ins), [(set B0, (add B0, 1))]>;
+let Defs = [R0, R1] in
+def tst2 : I<(outs GPR32:$rd), (ins GPR32:$rs1, GPR32:$rs2),
+             [(set GPR32:$rd, R0, (udivrem i32:$rs1, i32:$rs2))]>;
+
+def : Pat<(sdiv i32:$rs1, i32:$rs2), (tst2 $rs1, $rs2)>;
+def : Pat<(sdivrem i32:$rs1, i32:$rs2), (tst2 $rs1, $rs2)>;
+
+// CHECK-LABEL: // (sdiv:{ *:[i32] } i32:{ *:[i32] }:$rs1, i32:{ *:[i32] }:$rs2)  =>  (tst2:{ *:[i32] }:{ *:[i32] } ?:{ *:[i32] }:$rs1, ?:{ *:[i32] }:$rs2)
+// CHECK-NEXT: GIR_MutateOpcode, /*InsnID*/0, /*RecycleInsnID*/0, /*Opcode*/GIMT_Encode2(MyTarget::tst2),
+// CHECK-NEXT: GIR_AddImplicitDef, /*InsnID*/0, GIMT_Encode2(MyTarget::R0), GIMT_Encode2(RegState::Dead),
+// CHECK-NEXT: GIR_AddImplicitDef, /*InsnID*/0, GIMT_Encode2(MyTarget::R1), GIMT_Encode2(RegState::Dead),
+// CHECK-NEXT: GIR_RootConstrainSelectedInstOperands,
+// CHECK-NEXT: // GIR_Coverage, 2,
+
+// CHECK-LABEL: // (sdivrem:{ *:[i32] }:{ *:[i32] } i32:{ *:[i32] }:$rs1, i32:{ *:[i32] }:$rs2)  =>  (tst2:{ *:[i32] }:{ *:[i32] } ?:{ *:[i32] }:$rs1, ?:{ *:[i32] }:$rs2)
+// CHECK-NEXT: GIR_BuildRootMI, /*Opcode*/GIMT_Encode2(MyTarget::tst2),
+// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/0, // DstI[rd]
+// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/2, // rs1
+// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/3, // rs2
+// CHECK-NEXT: GIR_SetImplicitDefDead, /*InsnID*/0, /*OpIdx for MyTarget::R1*/1,
+// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(TargetOpcode::COPY),
+// CHECK-NEXT: GIR_Copy, /*NewInsnID*/1, /*OldInsnID*/0, /*OpIdx*/1, // DstI[R0]
+// CHECK-NEXT: GIR_AddRegister, /*InsnID*/1, GIMT_Encode2(MyTarget::R0), /*AddRegisterRegFlags*/GIMT_Encode2(0),
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, GIMT_Encode2(MyTarget::GPR32RegClassID),
+// CHECK-NEXT: GIR_RootConstrainSelectedInstOperands,
+// CHECK-NEXT: // GIR_Coverage, 3,
+// CHECK-NEXT: GIR_EraseRootFromParent_Done,
+
+// CHECK-LABEL: // (udivrem:{ *:[i32] }:{ *:[i32] } i32:{ *:[i32] }:$rs1, i32:{ *:[i32] }:$rs2)  =>  (tst2:{ *:[i32] }:{ *:[i32] } i32:{ *:[i32] }:$rs1, i32:{ *:[i32] }:$rs2)
+// CHECK-NEXT: GIR_BuildRootMI, /*Opcode*/GIMT_Encode2(MyTarget::tst2),
+// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/0, // DstI[rd]
+// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/2, // rs1
+// CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/3, // rs2
+// CHECK-NEXT: GIR_SetImplicitDefDead, /*InsnID*/0, /*OpIdx for MyTarget::R1*/1,
+// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(TargetOpcode::COPY),
+// CHECK-NEXT: GIR_Copy, /*NewInsnID*/1, /*OldInsnID*/0, /*OpIdx*/1, // DstI[R0]
+// CHECK-NEXT: GIR_AddRegister, /*InsnID*/1, GIMT_Encode2(MyTarget::R0), /*AddRegisterRegFlags*/GIMT_Encode2(0),
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, GIMT_Encode2(MyTarget::GPR32RegClassID),
+// CHECK-NEXT: GIR_RootConstrainSelectedInstOperands,
+// CHECK-NEXT: // GIR_Coverage, 1,
+// CHECK-NEXT: GIR_EraseRootFromParent_Done,
+
+// CHECK-LABEL: // (get_fpenv:{ *:[i32] })  =>  (tst1:{ *:[i32] })
+// CHECK-NEXT: GIR_BuildRootMI, /*Opcode*/GIMT_Encode2(MyTarget::tst1),
+// CHECK-NEXT: GIR_SetImplicitDefDead, /*InsnID*/0, /*OpIdx for MyTarget::R1*/1,
+// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(TargetOpcode::COPY),
+// CHECK-NEXT: GIR_Copy, /*NewInsnID*/1, /*OldInsnID*/0, /*OpIdx*/0, // DstI[R0]
+// CHECK-NEXT: GIR_AddRegister, /*InsnID*/1, GIMT_Encode2(MyTarget::R0), /*AddRegisterRegFlags*/GIMT_Encode2(0),
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, GIMT_Encode2(MyTarget::GPR32RegClassID),
+// CHECK-NEXT: GIR_RootConstrainSelectedInstOperands,
+// CHECK-NEXT: // GIR_Coverage, 0,
+// CHECK-NEXT: GIR_EraseRootFromParent_Done,
diff --git a/llvm/test/TableGen/GlobalISelEmitter-nested-subregs.td b/llvm/test/TableGen/GlobalISelEmitter-nested-subregs.td
index 1fdb973c1f1ec7..79e55ef2e8b8ce 100644
--- a/llvm/test/TableGen/GlobalISelEmitter-nested-subregs.td
+++ b/llvm/test/TableGen/GlobalISelEmitter-nested-subregs.td
@@ -38,11 +38,11 @@ def A0  : RegisterClass<"MyTarget", [i32], 32, (add a0)>;
 // CHECK-NEXT: // MIs[0] src
 // CHECK-NEXT: GIM_RootCheckType, /*Op*/1, /*Type*/GILLT_s8,
 // CHECK-NEXT: // (anyext:{ *:[i16] } i8:{ *:[i8] }:$src)  =>  (EXTRACT_SUBREG:{ *:[i16] } (INSERT_SUBREG:{ *:[i32] } (IMPLICIT_DEF:{ *:[i32] }), A0b:{ *:[i8] }:$src, lo8:{ *:[i32] }), lo16:{ *:[i32] })
-// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32,
 // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/1, /*TypeID*/GILLT_s32,
 // CHECK-NEXT: GIR_BuildMI, /*InsnID*/2, /*Opcode*/GIMT_Encode2(TargetOpcode::IMPLICIT_DEF),
 // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/2, /*TempRegID*/1, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
 // CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/2,
+// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32,
 // CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(TargetOpcode::INSERT_SUBREG),
 // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
 // CHECK-NEXT: GIR_AddSimpleTempRegister, /*InsnID*/1, /*TempRegID*/1,
diff --git a/llvm/test/TableGen/GlobalISelEmitterRegSequence.td b/llvm/test/TableGen/GlobalISelEmitterRegSequence.td
index 3829070b28efeb..69f82eac49c161 100644
--- a/llvm/test/TableGen/GlobalISelEmitterRegSequence.td
+++ b/llvm/test/TableGen/GlobalISelEmitterRegSequence.td
@@ -39,12 +39,12 @@ def SUBSOME_INSN : I<(outs SRegs:$dst), (ins SOP:$src), []>;
 // CHECK-NEXT: GIM_RootCheckType, /*Op*/1, /*Type*/GILLT_s16,
 // CHECK-NEXT: GIM_RootCheckRegBankForClass, /*Op*/1, /*RC*/GIMT_Encode2(Test::SRegsRegClassID),
 // CHECK-NEXT: // (sext:{ *:[i32] } SOP:{ *:[i16] }:$src)  =>  (REG_SEQUENCE:{ *:[i32] } DRegs:{ *:[i32] }, (SUBSOME_INSN:{ *:[i16] } SOP:{ *:[i16] }:$src), sub0:{ *:[i32] }, (SUBSOME_INSN:{ *:[i16] } SOP:{ *:[i16] }:$src), sub1:{ *:[i32] })
-// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s16,
 // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/1, /*TypeID*/GILLT_s16,
 // CHECK-NEXT: GIR_BuildMI, /*InsnID*/2, /*Opcode*/GIMT_Encode2(MyTarget::SUBSOME_INSN),
 // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/2, /*TempRegID*/1, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
 // CHECK-NEXT: GIR_Copy, /*NewInsnID*/2, /*OldInsnID*/0, /*OpIdx*/1, // src
 // CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/2,
+// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s16,
 // CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(MyTarget::SUBSOME_INSN),
 // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
 // CHECK-NEXT: GIR_Copy, /*NewInsnID*/1, /*OldInsnID*/0, /*OpIdx*/1, // src
diff --git a/llvm/test/TableGen/GlobalISelEmitterSubreg.td b/llvm/test/TableGen/GlobalISelEmitterSubreg.td
index 8df3238f6cc21e..08e690f3e894de 100644
--- a/llvm/test/TableGen/GlobalISelEmitterSubreg.td
+++ b/llvm/test/TableGen/GlobalISelEmitterSubreg.td
@@ -59,13 +59,13 @@ def : Pat<(sub (complex DOP:$src1, DOP:$src2), 77),
           (SOME_INSN2 (EXTRACT_SUBREG DOP:$src1, sub0),
                       (EXTRACT_SUBREG DOP:$src2, sub1))>;
 // CHECK-LABEL: // (sub:{ *:[i32] } (complex:{ *:[i32] } DOP:{ *:[i32] }:$src1, DOP:{ *:[i32] }:$src2), 77:{ *:[i32] })  =>  (SOME_INSN2:{ *:[i32] } (EXTRACT_SUBREG:{ *:[i32] } DOP:{ *:[i32] }:$src1, sub0:{ *:[i32] }), (EXTRACT_SUBREG:{ *:[i32] } DOP:{ *:[i32] }:$src2, sub1:{ *:[i32] }))
-// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32,
 // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/1, /*TypeID*/GILLT_s32,
 // CHECK-NEXT: GIR_BuildMI, /*InsnID*/2, /*Opcode*/GIMT_Encode2(TargetOpcode::COPY),
 // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/2, /*TempRegID*/1, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
 // CHECK-NEXT: GIR_ComplexSubOperandSubRegRenderer, /*InsnID*/2, /*RendererID*/GIMT_Encode2(0), /*SubOperand*/1, /*SubRegIdx*/GIMT_Encode2(2), // src2
 // CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/2, /*Op*/0, GIMT_Encode2(Test::SRegsRegClassID),
 // CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/2, /*Op*/1, GIMT_Encode2(Test::DRegsRegClassID),
+// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32,
 // CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(TargetOpcode::COPY),
 // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
 // CHECK-NEXT: GIR_ComplexSubOperandSubRegRenderer, /*InsnID*/1, /*RendererID*/GIMT_Encode2(0), /*SubOperand*/0, /*SubRegIdx*/GIMT_Encode2(1), // src1
@@ -103,11 +103,11 @@ def : Pat<(i32 (anyext i16:$src)), (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SOP:$src
 // instruction.
 def : Pat<(i32 (anyext i16:$src)), (SOME_INSN (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SOP:$src, sub0))>;
 // CHECK-LABEL:  (anyext:{ *:[i32] } i16:{ *:[i16] }:$src)  =>  (SOME_INSN:{ *:[i32] } (INSERT_SUBREG:{ *:[i32] } (IMPLICIT_DEF:{ *:[i32] }), SOP:{ *:[i16] }:$src, sub0:{ *:[i32] }))
-// CHECK-NEXT:            GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32,
 // CHECK-NEXT:            GIR_MakeTempReg, /*TempRegID*/1, /*TypeID*/GILLT_s32,
 // CHECK-NEXT:            GIR_BuildMI, /*InsnID*/2, /*Opcode*/GIMT_Encode2(TargetOpcode::IMPLICIT_DEF),
 // CHECK-NEXT:            GIR_AddTempRegister, /*InsnID*/2, /*TempRegID*/1, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
 // CHECK-NEXT:            GIR_ConstrainSelectedInstOperands, /*InsnID*/2,
+// CHECK-NEXT:            GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32,
 // CHECK-NEXT:            GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(TargetOpcode::INSERT_SUBREG),
 // CHECK-NEXT:            GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
 // CHECK-NEXT:            GIR_AddSimpleTempRegister, /*InsnID*/1, /*TempRegID*/1,
@@ -138,12 +138,12 @@ def : Pat<(i32 (anyext i16:$src)), (INSERT_SUBREG (i32 (COPY_TO_REGCLASS SOP:$sr
 // by a subinstruction.
 def : Pat<(i32 (anyext i16:$src)), (INSERT_SUBREG (i32 (IMPLICIT_DEF)), (SUBSOME_INSN SOP:$src), sub0)>;
 // CHECK-LABEL:  (anyext:{ *:[i32] } i16:{ *:[i16] }:$src)  =>  (INSERT_SUBREG:{ *:[i32] } (IMPLICIT_DEF:{ *:[i32] }), (SUBSOME_INSN:{ *:[i16] } SOP:{ *:[i16] }:$src), sub0:{ *:[i32] })
-// CHECK-NEXT:          GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32,
 // CHECK-NEXT:          GIR_MakeTempReg, /*TempRegID*/1, /*TypeID*/GILLT_s16,
 // CHECK-NEXT:          GIR_BuildMI, /*InsnID*/2, /*Opcode*/GIMT_Encode2(MyTarget::SUBSOME_INSN),
 // CHECK-NEXT:          GIR_AddTempRegister, /*InsnID*/2, /*TempRegID*/1, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
 // CHECK-NEXT:          GIR_Copy, /*NewInsnID*/2, /*OldInsnID*/0, /*OpIdx*/1, // src
 // CHECK-NEXT:          GIR_ConstrainSelectedInstOperands, /*InsnID*/2,
+// CHECK-NEXT:          GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s32,
 // CHECK-NEXT:          GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(TargetOpcode::IMPLICIT_DEF),
 // CHECK-NEXT:          GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
 // CHECK-NEXT:          GIR_ConstrainSelectedInstOperands, /*InsnID*/1,
@@ -200,12 +200,12 @@ def : Pat<(i16 (trunc (bitreverse DOP:$src))),
 // CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/1, /*Op*/1, /*RC*/GIMT_Encode2(Test::DRegsRegClassID),
 // CHECK-NEXT: GIM_CheckIsSafeToFold, /*NumInsns*/1,
 // CHECK-NEXT: // (trunc:{ *:[i16] } (ctpop:{ *:[i32] } DOP:{ *:[i32] }:$src))  =>  (SUBSOME_INSN2:{ *:[i16] } (EXTRACT_SUBREG:{ *:[i16] } (SOME_INSN:{ *:[i32] } DOP:{ *:[i32] }:$src), sub0:{ *:[i32] }))
-// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s16,
 // CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/1, /*TypeID*/GILLT_s32,
 // CHECK-NEXT: GIR_BuildMI, /*InsnID*/2, /*Opcode*/GIMT_Encode2(MyTarget::SOME_INSN),
 // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/2, /*TempRegID*/1, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
 // CHECK-NEXT: GIR_Copy, /*NewInsnID*/2, /*OldInsnID*/1, /*OpIdx*/1, // src
 // CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/2,
+// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s16,
 // CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/GIMT_Encode2(TargetOpcode::COPY),
 // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/GIMT_Encode2(RegState::Define),
 // CHECK-NEXT: GIR_AddTempSubRegister, /*InsnID*/1, /*TempRegID*/1, /*TempRegFlags*/GIMT_Encode2(0), GIMT_Encode2(sub0),
diff --git a/llvm/utils/TableGen/Common/CodeGenRegisters.cpp b/llvm/utils/TableGen/Common/CodeGenRegisters.cpp
index 2dbee94d7e5406..011d11184c70c7 100644
--- a/llvm/utils/TableGen/Common/CodeGenRegisters.cpp
+++ b/llvm/utils/TableGen/Common/CodeGenRegisters.cpp
@@ -2494,7 +2494,7 @@ CodeGenRegBank::getRegClassForRegister(const Record *R) {
 
 const CodeGenRegisterClass *
 CodeGenRegBank::getMinimalPhysRegClass(const Record *RegRecord,
-                                       ValueTypeByHwMode *VT) {
+                                       const ValueTypeByHwMode *VT) {
   const CodeGenRegister *Reg = getReg(RegRecord);
   const CodeGenRegisterClass *BestRC = nullptr;
   for (const auto &RC : getRegClasses()) {
diff --git a/llvm/utils/TableGen/Common/CodeGenRegisters.h b/llvm/utils/TableGen/Common/CodeGenRegisters.h
index 2fa6cab2afb892..90489cae6164ba 100644
--- a/llvm/utils/TableGen/Common/CodeGenRegisters.h
+++ b/llvm/utils/TableGen/Common/CodeGenRegisters.h
@@ -792,7 +792,7 @@ class CodeGenRegBank {
   // with a matching type
   const CodeGenRegisterClass *
   getMinimalPhysRegClass(const Record *RegRecord,
-                         ValueTypeByHwMode *VT = nullptr);
+                         const ValueTypeByHwMode *VT = nullptr);
 
   // Get the sum of unit weights.
   unsigned getRegUnitSetWeight(const std::vector<unsigned> &Units) const {
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
index 15ec7e17130de4..6039211bc6cf00 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
@@ -1994,7 +1994,8 @@ void AddRegisterRenderer::emitRenderOpcodes(MatchTable &Table,
   // really needed for a physical register reference. We can pack the
   // register and flags in a single field.
   if (IsDef)
-    Table << MatchTable::NamedValue(2, "RegState::Define");
+    Table << MatchTable::NamedValue(
+        2, IsDead ? "RegState::Define | RegState::Dead" : "RegState::Define");
   else
     Table << MatchTable::IntValue(2, 0);
   Table << MatchTable::LineBreak;
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
index 00fe073057c5c9..48ce71be677c08 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
@@ -2091,13 +2091,15 @@ class AddRegisterRenderer : public OperandRenderer {
   unsigned InsnID;
   const Record *RegisterDef;
   bool IsDef;
+  bool IsDead;
   const CodeGenTarget &Target;
 
 public:
   AddRegisterRenderer(unsigned InsnID, const CodeGenTarget &Target,
-                      const Record *RegisterDef, bool IsDef = false)
+                      const Record *RegisterDef, bool IsDef = false,
+                      bool IsDead = false)
       : OperandRenderer(OR_Register), InsnID(InsnID), RegisterDef(RegisterDef),
-        IsDef(IsDef), Target(Target) {}
+        IsDef(IsDef), IsDead(IsDead), Target(Target) {}
 
   static bool classof(const OperandRenderer *R) {
     return R->getKind() == OR_Register;
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 84f23985b64213..4fa64248878b40 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -324,8 +324,6 @@ class GlobalISelEmitter final : public GlobalISelMatchTableExecutorEmitter {
   void emitTestSimplePredicate(raw_ostream &OS) override;
   void emitRunCustomAction(raw_ostream &OS) override;
 
-  void postProcessRule(RuleMatcher &M);
-
   const CodeGenTarget &getTarget() const override { return Target; }
   StringRef getClassName() const override { return ClassName; }
 
@@ -384,44 +382,41 @@ class GlobalISelEmitter final : public GlobalISelMatchTableExecutorEmitter {
 
   Error importRulePredicates(RuleMatcher &M,
                              ArrayRef<const Record *> Predicates);
-  Expected<InstructionMatcher &>
-  createAndImportSelDAGMatcher(RuleMatcher &Rule,
-                               InstructionMatcher &InsnMatcher,
-                               const TreePatternNode &Src, unsigned &TempOpIdx);
+  Expected<InstructionMatcher &> createAndImportSelDAGMatcher(
+      RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
+      const PatternToMatch &P, const TreePatternNode &Src, unsigned &TempOpIdx);
   Error importComplexPatternOperandMatcher(OperandMatcher &OM, const Record *R,
                                            unsigned &TempOpIdx) const;
   Error importChildMatcher(RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
+                           const PatternToMatch &P,
                            const TreePatternNode &SrcChild,
                            bool OperandIsAPointer, bool OperandIsImmArg,
                            unsigned OpIdx, unsigned &TempOpIdx);
 
   Expected<BuildMIAction &> createAndImportInstructionRenderer(
-      RuleMatcher &M, InstructionMatcher &InsnMatcher,
+      RuleMatcher &M, InstructionMatcher &InsnMatcher, const PatternToMatch &P,
       const TreePatternNode &Src, const TreePatternNode &Dst);
   Expected<action_iterator> createAndImportSubInstructionRenderer(
-      action_iterator InsertPt, RuleMatcher &M, const TreePatternNode &Dst,
-      const TreePatternNode &Src, unsigned TempReg);
+      action_iterator InsertPt, RuleMatcher &M, const PatternToMatch &P,
+      const TreePatternNode &Dst, unsigned TempReg);
   Expected<action_iterator>
   createInstructionRenderer(action_iterator InsertPt, RuleMatcher &M,
                             const TreePatternNode &Dst);
 
   Expected<action_iterator>
-  importExplicitDefRenderers(action_iterator InsertPt, RuleMatcher &M,
-                             BuildMIAction &DstMIBuilder,
-                             const TreePatternNode &Src,
-                             const TreePatternNode &Dst, unsigned Start = 0);
+  importDefRenderers(action_iterator InsertPt, RuleMatcher &M,
+                     BuildMIAction &DstMIBuilder, const PatternToMatch &P,
+                     const TreePatternNode &Dst, unsigned Start = 0);
 
   Expected<action_iterator> importExplicitUseRenderers(
       action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
-      const llvm::TreePatternNode &Dst, const TreePatternNode &Src);
+      const PatternToMatch &P, const TreePatternNode &Dst);
   Expected<action_iterator> importExplicitUseRenderer(
       action_iterator InsertPt, RuleMatcher &Rule, BuildMIAction &DstMIBuilder,
-      const TreePatternNode &DstChild, const TreePatternNode &Src);
+      const PatternToMatch &P, const TreePatternNode &Dst);
   Error importDefaultOperandRenderers(action_iterator InsertPt, RuleMatcher &M,
                                       BuildMIAction &DstMIBuilder,
                                       const DAGDefaultOperand &DefaultOp) const;
-  Error importImplicitDefRenderers(BuildMIAction &DstMIBuilder,
-                                   ArrayRef<const Record *> ImplicitDefs) const;
 
   /// Analyze pattern \p P, returning a matcher for it if possible.
   /// Otherwise, return an Error explaining why we don't support it.
@@ -725,7 +720,7 @@ Expected<InstructionMatcher &> GlobalISelEmitter::addBuiltinPredicates(
 }
 
 Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
-    RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
+    RuleMatcher &Rule, InstructionMatcher &InsnMatcher, const PatternToMatch &P,
     const TreePatternNode &Src, unsigned &TempOpIdx) {
   const auto SavedFlags = Rule.setGISelFlags(Src.getGISelFlagsRecord());
 
@@ -925,9 +920,9 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
         OperandIsImmArg |= II->isParamImmArg(I - 1);
       }
 
-      if (auto Error =
-              importChildMatcher(Rule, InsnMatcher, SrcChild, OperandIsAPointer,
-                                 OperandIsImmArg, OpIdx++, TempOpIdx))
+      if (auto Error = importChildMatcher(Rule, InsnMatcher, P, SrcChild,
+                                          OperandIsAPointer, OperandIsImmArg,
+                                          OpIdx++, TempOpIdx))
         return std::move(Error);
     }
   }
@@ -966,7 +961,7 @@ static StringRef getSrcChildName(const TreePatternNode &SrcChild,
 }
 
 Error GlobalISelEmitter::importChildMatcher(
-    RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
+    RuleMatcher &Rule, InstructionMatcher &InsnMatcher, const PatternToMatch &P,
     const TreePatternNode &SrcChild, bool OperandIsAPointer,
     bool OperandIsImmArg, unsigned OpIdx, unsigned &TempOpIdx) {
 
@@ -1087,7 +1082,7 @@ Error GlobalISelEmitter::importChildMatcher(
     // Map the node to a gMIR instruction.
     InstructionOperandMatcher &InsnOperand = **MaybeInsnOperand;
     auto InsnMatcherOrError = createAndImportSelDAGMatcher(
-        Rule, InsnOperand.getInsnMatcher(), SrcChild, TempOpIdx);
+        Rule, InsnOperand.getInsnMatcher(), P, SrcChild, TempOpIdx);
     if (auto Error = InsnMatcherOrError.takeError())
       return Error;
 
@@ -1178,8 +1173,8 @@ Error GlobalISelEmitter::importChildMatcher(
       // has to succeed.
       OperandMatcher &OM =
           InsnOperand.getInsnMatcher().addOperand(0, "", TempOpIdx);
-      if (auto Error =
-              OM.addTypeCheckPredicate(TypeSetByHwMode(VTy), false /* OperandIsAPointer */))
+      if (auto Error = OM.addTypeCheckPredicate(TypeSetByHwMode(VTy),
+                                                false /* OperandIsAPointer */))
         return failedImport(toString(std::move(Error)) +
                             " for result of Src pattern operator");
 
@@ -1198,23 +1193,22 @@ Error GlobalISelEmitter::importChildMatcher(
 
 Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
     action_iterator InsertPt, RuleMatcher &Rule, BuildMIAction &DstMIBuilder,
-    const TreePatternNode &DstChild, const TreePatternNode &Src) {
+    const PatternToMatch &P, const TreePatternNode &Dst) {
 
-  const auto &SubOperand = Rule.getComplexSubOperand(DstChild.getName());
+  const auto &SubOperand = Rule.getComplexSubOperand(Dst.getName());
   if (SubOperand) {
     DstMIBuilder.addRenderer<RenderComplexPatternOperand>(
-        *std::get<0>(*SubOperand), DstChild.getName(), std::get<1>(*SubOperand),
+        *std::get<0>(*SubOperand), Dst.getName(), std::get<1>(*SubOperand),
         std::get<2>(*SubOperand));
     return InsertPt;
   }
 
-  if (!DstChild.isLeaf()) {
-    if (DstChild.getOperator()->isSubClassOf("SDNodeXForm")) {
-      auto &Child = DstChild.getChild(0);
-      auto I = SDNodeXFormEquivs.find(DstChild.getOperator());
+  if (!Dst.isLeaf()) {
+    if (Dst.getOperator()->isSubClassOf("SDNodeXForm")) {
+      auto &Child = Dst.getChild(0);
+      auto I = SDNodeXFormEquivs.find(Dst.getOperator());
       if (I != SDNodeXFormEquivs.end()) {
-        const Record *XFormOpc =
-            DstChild.getOperator()->getValueAsDef("Opcode");
+        const Record *XFormOpc = Dst.getOperator()->getValueAsDef("Opcode");
         if (XFormOpc->getName() == "timm") {
           // If this is a TargetConstant, there won't be a corresponding
           // instruction to transform. Instead, this will refer directly to an
@@ -1233,10 +1227,10 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
 
     // We accept 'bb' here. It's an operator because BasicBlockSDNode isn't
     // inline, but in MI it's just another operand.
-    if (DstChild.getOperator()->isSubClassOf("SDNode")) {
-      auto &ChildSDNI = CGP.getSDNodeInfo(DstChild.getOperator());
+    if (Dst.getOperator()->isSubClassOf("SDNode")) {
+      auto &ChildSDNI = CGP.getSDNodeInfo(Dst.getOperator());
       if (ChildSDNI.getSDClassName() == "BasicBlockSDNode") {
-        DstMIBuilder.addRenderer<CopyRenderer>(DstChild.getName());
+        DstMIBuilder.addRenderer<CopyRenderer>(Dst.getName());
         return InsertPt;
       }
     }
@@ -1245,26 +1239,25 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
     // rendered as operands.
     // FIXME: The target should be able to choose sign-extended when appropriate
     //        (e.g. on Mips).
-    if (DstChild.getOperator()->getName() == "timm") {
-      DstMIBuilder.addRenderer<CopyRenderer>(DstChild.getName());
+    if (Dst.getOperator()->getName() == "timm") {
+      DstMIBuilder.addRenderer<CopyRenderer>(Dst.getName());
       return InsertPt;
     }
-    if (DstChild.getOperator()->getName() == "tframeindex") {
-      DstMIBuilder.addRenderer<CopyRenderer>(DstChild.getName());
+    if (Dst.getOperator()->getName() == "tframeindex") {
+      DstMIBuilder.addRenderer<CopyRenderer>(Dst.getName());
       return InsertPt;
     }
-    if (DstChild.getOperator()->getName() == "imm") {
-      DstMIBuilder.addRenderer<CopyConstantAsImmRenderer>(DstChild.getName());
+    if (Dst.getOperator()->getName() == "imm") {
+      DstMIBuilder.addRenderer<CopyConstantAsImmRenderer>(Dst.getName());
       return InsertPt;
     }
-    if (DstChild.getOperator()->getName() == "fpimm") {
-      DstMIBuilder.addRenderer<CopyFConstantAsFPImmRenderer>(
-          DstChild.getName());
+    if (Dst.getOperator()->getName() == "fpimm") {
+      DstMIBuilder.addRenderer<CopyFConstantAsFPImmRenderer>(Dst.getName());
       return InsertPt;
     }
 
-    if (DstChild.getOperator()->isSubClassOf("Instruction")) {
-      auto OpTy = getInstResultType(DstChild, Target);
+    if (Dst.getOperator()->isSubClassOf("Instruction")) {
+      auto OpTy = getInstResultType(Dst, Target);
       if (!OpTy)
         return OpTy.takeError();
 
@@ -1274,29 +1267,28 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
       DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID);
 
       auto InsertPtOrError = createAndImportSubInstructionRenderer(
-          ++InsertPt, Rule, DstChild, Src, TempRegID);
+          ++InsertPt, Rule, P, Dst, TempRegID);
       if (auto Error = InsertPtOrError.takeError())
         return std::move(Error);
       return InsertPtOrError.get();
     }
 
     return failedImport("Dst pattern child isn't a leaf node or an MBB" +
-                        llvm::to_string(DstChild));
+                        llvm::to_string(Dst));
   }
 
   // It could be a specific immediate in which case we should just check for
   // that immediate.
-  if (const IntInit *ChildIntInit =
-          dyn_cast<IntInit>(DstChild.getLeafValue())) {
+  if (const IntInit *ChildIntInit = dyn_cast<IntInit>(Dst.getLeafValue())) {
     DstMIBuilder.addRenderer<ImmRenderer>(ChildIntInit->getValue());
     return InsertPt;
   }
 
   // Otherwise, we're looking for a bog-standard RegisterClass operand.
-  if (auto *ChildDefInit = dyn_cast<DefInit>(DstChild.getLeafValue())) {
+  if (auto *ChildDefInit = dyn_cast<DefInit>(Dst.getLeafValue())) {
     auto *ChildRec = ChildDefInit->getDef();
 
-    ArrayRef<TypeSetByHwMode> ChildTypes = DstChild.getExtTypes();
+    ArrayRef<TypeSetByHwMode> ChildTypes = Dst.getExtTypes();
     if (ChildTypes.size() != 1)
       return failedImport("Dst pattern child has multiple results");
 
@@ -1317,11 +1309,11 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
       if (ChildRec->isSubClassOf("RegisterOperand") &&
           !ChildRec->isValueUnset("GIZeroRegister")) {
         DstMIBuilder.addRenderer<CopyOrAddZeroRegRenderer>(
-            DstChild.getName(), ChildRec->getValueAsDef("GIZeroRegister"));
+            Dst.getName(), ChildRec->getValueAsDef("GIZeroRegister"));
         return InsertPt;
       }
 
-      DstMIBuilder.addRenderer<CopyRenderer>(DstChild.getName());
+      DstMIBuilder.addRenderer<CopyRenderer>(Dst.getName());
       return InsertPt;
     }
 
@@ -1337,9 +1329,9 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
         return failedImport(
             "SelectionDAG ComplexPattern not mapped to GlobalISel");
 
-      const OperandMatcher &OM = Rule.getOperandMatcher(DstChild.getName());
+      const OperandMatcher &OM = Rule.getOperandMatcher(Dst.getName());
       DstMIBuilder.addRenderer<RenderComplexPatternOperand>(
-          *ComplexPattern->second, DstChild.getName(),
+          *ComplexPattern->second, Dst.getName(),
           OM.getAllocatedTemporariesBaseID());
       return InsertPt;
     }
@@ -1350,17 +1342,16 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
 
   // Handle the case where the MVT/register class is omitted in the dest pattern
   // but MVT exists in the source pattern.
-  if (isa<UnsetInit>(DstChild.getLeafValue()) &&
-      Rule.hasOperand(DstChild.getName())) {
-    DstMIBuilder.addRenderer<CopyRenderer>(DstChild.getName());
+  if (isa<UnsetInit>(Dst.getLeafValue()) && Rule.hasOperand(Dst.getName())) {
+    DstMIBuilder.addRenderer<CopyRenderer>(Dst.getName());
     return InsertPt;
   }
   return failedImport("Dst pattern child is an unsupported kind");
 }
 
 Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
-    RuleMatcher &M, InstructionMatcher &InsnMatcher, const TreePatternNode &Src,
-    const TreePatternNode &Dst) {
+    RuleMatcher &M, InstructionMatcher &InsnMatcher, const PatternToMatch &P,
+    const TreePatternNode &Src, const TreePatternNode &Dst) {
   auto InsertPtOrError = createInstructionRenderer(M.actions_end(), M, Dst);
   if (auto Error = InsertPtOrError.takeError())
     return std::move(Error);
@@ -1380,13 +1371,11 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
   }
 
   if (auto Error =
-          importExplicitDefRenderers(InsertPt, M, DstMIBuilder, Src, Dst)
-              .takeError())
+          importDefRenderers(InsertPt, M, DstMIBuilder, P, Dst).takeError())
     return std::move(Error);
 
-  if (auto Error =
-          importExplicitUseRenderers(InsertPt, M, DstMIBuilder, Dst, Src)
-              .takeError())
+  if (auto Error = importExplicitUseRenderers(InsertPt, M, DstMIBuilder, P, Dst)
+                       .takeError())
     return std::move(Error);
 
   return DstMIBuilder;
@@ -1394,8 +1383,8 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
 
 Expected<action_iterator>
 GlobalISelEmitter::createAndImportSubInstructionRenderer(
-    const action_iterator InsertPt, RuleMatcher &M, const TreePatternNode &Dst,
-    const TreePatternNode &Src, unsigned TempRegID) {
+    const action_iterator InsertPt, RuleMatcher &M, const PatternToMatch &P,
+    const TreePatternNode &Dst, unsigned TempRegID) {
   auto InsertPtOrError = createInstructionRenderer(InsertPt, M, Dst);
 
   // TODO: Assert there's exactly one result.
@@ -1410,15 +1399,13 @@ GlobalISelEmitter::createAndImportSubInstructionRenderer(
   DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID, true);
 
   // Handle additional (ignored) results.
-  if (DstMIBuilder.getCGI()->Operands.NumDefs > 1) {
-    InsertPtOrError = importExplicitDefRenderers(
-        std::prev(*InsertPtOrError), M, DstMIBuilder, Src, Dst, /*Start=*/1);
-    if (auto Error = InsertPtOrError.takeError())
-      return std::move(Error);
-  }
+  InsertPtOrError = importDefRenderers(std::prev(*InsertPtOrError), M,
+                                       DstMIBuilder, P, Dst, /*Start=*/1);
+  if (auto Error = InsertPtOrError.takeError())
+    return std::move(Error);
 
   InsertPtOrError = importExplicitUseRenderers(InsertPtOrError.get(), M,
-                                               DstMIBuilder, Dst, Src);
+                                               DstMIBuilder, P, Dst);
   if (auto Error = InsertPtOrError.takeError())
     return std::move(Error);
 
@@ -1450,29 +1437,39 @@ Expected<action_iterator> GlobalISelEmitter::createInstructionRenderer(
                                        DstI);
 }
 
-Expected<action_iterator> GlobalISelEmitter::importExplicitDefRenderers(
+Expected<action_iterator> GlobalISelEmitter::importDefRenderers(
     action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
-    const TreePatternNode &Src, const TreePatternNode &Dst, unsigned Start) {
+    const PatternToMatch &P, const TreePatternNode &Dst, unsigned Start) {
   const CodeGenInstruction *DstI = DstMIBuilder.getCGI();
-  const unsigned SrcNumDefs = Src.getExtTypes().size();
-  const unsigned DstNumDefs = DstI->Operands.NumDefs;
-  if (DstNumDefs == 0)
-    return InsertPt;
-
-  for (unsigned I = Start; I < SrcNumDefs; ++I) {
-    std::string OpName = getMangledRootDefName(DstI->Operands[I].Name);
-    // CopyRenderer saves a StringRef, so cannot pass OpName itself -
-    // let's use a string with an appropriate lifetime.
-    StringRef PermanentRef = M.getOperandMatcher(OpName).getSymbolicName();
-    DstMIBuilder.addRenderer<CopyRenderer>(PermanentRef);
-  }
+  const unsigned DstExpDefs = DstI->Operands.NumDefs;
+  const unsigned DstNumDefs = DstExpDefs + DstI->ImplicitDefs.size();
+  bool IsRoot = &Dst == &P.getDstPattern();
+
+  unsigned I = Start;
+  for (; I < DstExpDefs; ++I) {
+    const CGIOperandList::OperandInfo &OpInfo = DstI->Operands[I];
+    std::string OpName = getMangledRootDefName(OpInfo.Name);
+
+    if (M.hasOperand(OpName)) {
+      // CopyRenderer saves a StringRef, so cannot pass OpName itself -
+      // let's use a string with an appropriate lifetime.
+      StringRef PermanentRef = M.getOperandMatcher(OpName).getSymbolicName();
+      DstMIBuilder.addRenderer<CopyRenderer>(PermanentRef);
+      continue;
+    }
 
-  // Some instructions have multiple defs, but are missing a type entry
-  // (e.g. s_cc_out operands).
-  if (Dst.getExtTypes().size() < DstNumDefs)
-    return failedImport("unhandled discarded def");
+    if (OpInfo.Rec->isSubClassOf("OptionalDefOperand")) {
+      const DAGDefaultOperand &ComplexOp = CGP.getDefaultOperand(OpInfo.Rec);
+      for (const TreePatternNode &SubOp :
+           make_pointee_range(ComplexOp.DefaultOps)) {
+        const Record *Reg = cast<DefInit>(SubOp.getLeafValue())->getDef();
+        assert(Reg->isSubClassOf("Register"));
+        DstMIBuilder.addRenderer<AddRegisterRenderer>(
+            Target, Reg, /*IsDef=*/true, /*IsDead=*/true);
+      }
+      continue;
+    }
 
-  for (unsigned I = SrcNumDefs; I < DstNumDefs; ++I) {
     const TypeSetByHwMode &ExtTy = Dst.getExtType(I);
     if (!ExtTy.isMachineValueType())
       return failedImport("unsupported typeset");
@@ -1484,7 +1481,30 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitDefRenderers(
     unsigned TempRegID = M.allocateTempRegID();
     InsertPt =
         M.insertAction<MakeTempRegisterAction>(InsertPt, *OpTy, TempRegID);
-    DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID, true, nullptr, true);
+    DstMIBuilder.addRenderer<TempRegRenderer>(
+        TempRegID, /*IsDef=*/true, /*SubReg=*/nullptr, /*IsDead=*/true);
+  }
+
+  for (; I < DstNumDefs; ++I) {
+    const Record *Reg = DstI->ImplicitDefs[I - DstExpDefs];
+    std::string OpName = getMangledRootDefName(Reg->getName());
+
+    if (!IsRoot || !M.hasOperand(OpName)) {
+      DstMIBuilder.setDeadImplicitDef(Reg);
+      continue;
+    }
+
+    BuildMIAction &CopyBuilder = M.addAction<BuildMIAction>(
+        M.allocateOutputInsnID(), &Target.getInstruction(RK.getDef("COPY")));
+
+    StringRef PermanentRef = M.getOperandMatcher(OpName).getSymbolicName();
+    CopyBuilder.addRenderer<CopyRenderer>(PermanentRef);
+    CopyBuilder.addRenderer<AddRegisterRenderer>(Target, Reg);
+
+    const CodeGenRegisterClass *RC = CGRegs.getRegClassForRegister(Reg);
+    assert(RC);
+    M.addAction<ConstrainOperandToRegClassAction>(CopyBuilder.getInsnID(),
+                                                  /*OpIdx=*/0, *RC);
   }
 
   return InsertPt;
@@ -1492,7 +1512,7 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitDefRenderers(
 
 Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
     action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
-    const llvm::TreePatternNode &Dst, const llvm::TreePatternNode &Src) {
+    const PatternToMatch &P, const TreePatternNode &Dst) {
   const CodeGenInstruction *DstI = DstMIBuilder.getCGI();
   CodeGenInstruction *OrigDstI = &Target.getInstruction(Dst.getOperator());
 
@@ -1522,7 +1542,7 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
                                                         TempRegID);
 
       auto InsertPtOrError = createAndImportSubInstructionRenderer(
-          ++InsertPt, M, ValChild, Src, TempRegID);
+          ++InsertPt, M, P, ValChild, TempRegID);
       if (auto Error = InsertPtOrError.takeError())
         return std::move(Error);
 
@@ -1580,7 +1600,7 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
         CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef());
 
         auto InsertPtOrError =
-            importExplicitUseRenderer(InsertPt, M, DstMIBuilder, ValChild, Src);
+            importExplicitUseRenderer(InsertPt, M, DstMIBuilder, P, ValChild);
         if (auto Error = InsertPtOrError.takeError())
           return std::move(Error);
         InsertPt = InsertPtOrError.get();
@@ -1649,7 +1669,7 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
     }
 
     auto InsertPtOrError = importExplicitUseRenderer(InsertPt, M, DstMIBuilder,
-                                                     Dst.getChild(Child), Src);
+                                                     P, Dst.getChild(Child));
     if (auto Error = InsertPtOrError.takeError())
       return std::move(Error);
     InsertPt = InsertPtOrError.get();
@@ -1707,13 +1727,6 @@ Error GlobalISelEmitter::importDefaultOperandRenderers(
   return Error::success();
 }
 
-Error GlobalISelEmitter::importImplicitDefRenderers(
-    BuildMIAction &DstMIBuilder, ArrayRef<const Record *> ImplicitDefs) const {
-  if (!ImplicitDefs.empty())
-    return failedImport("Pattern defines a physical register");
-  return Error::success();
-}
-
 Error GlobalISelEmitter::constrainOperands(action_iterator InsertPt,
                                            RuleMatcher &M, unsigned InsnID,
                                            const TreePatternNode &Dst) {
@@ -2039,7 +2052,7 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
   const auto SavedFlags = M.setGISelFlags(P.getSrcRecord());
 
   auto InsnMatcherOrError =
-      createAndImportSelDAGMatcher(M, InsnMatcherTemp, Src, TempOpIdx);
+      createAndImportSelDAGMatcher(M, InsnMatcherTemp, P, Src, TempOpIdx);
   if (auto Error = InsnMatcherOrError.takeError())
     return std::move(Error);
   InstructionMatcher &InsnMatcher = InsnMatcherOrError.get();
@@ -2090,13 +2103,14 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
   unsigned DstExpDefs = DstI.Operands.NumDefs,
            DstNumDefs = DstI.ImplicitDefs.size() + DstExpDefs,
            SrcNumDefs = Src.getExtTypes().size();
+
+  bool FoundNoUsePred = false;
   if (DstNumDefs < SrcNumDefs) {
     if (DstNumDefs != 0)
       return failedImport("Src pattern result has more defs than dst MI (" +
                           to_string(SrcNumDefs) + " def(s) vs " +
                           to_string(DstNumDefs) + " def(s))");
 
-    bool FoundNoUsePred = false;
     for (const auto &Pred : InsnMatcher.predicates()) {
       if ((FoundNoUsePred = isa<NoUsePredicateMatcher>(Pred.get())))
         break;
@@ -2109,15 +2123,24 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
 
   // The root of the match also has constraints on the register bank so that it
   // matches the result instruction.
-  unsigned N = std::min(DstExpDefs, SrcNumDefs);
-  for (unsigned I = 0; I < N; ++I) {
-    const auto &DstIOperand = DstI.Operands[I];
+  for (unsigned I = 0; I < SrcNumDefs; ++I) {
+    if (FoundNoUsePred)
+      continue;
 
     OperandMatcher &OM = InsnMatcher.getOperand(I);
+
+    if (I >= DstExpDefs) {
+      const Record *Reg = DstI.ImplicitDefs[I - DstExpDefs];
+      OM.setSymbolicName(getMangledRootDefName(Reg->getName()));
+      M.defineOperand(OM.getSymbolicName(), OM);
+      continue;
+    }
+
     // The operand names declared in the DstI instruction are unrelated to
     // those used in pattern's source and destination DAGs, so mangle the
     // former to prevent implicitly adding unexpected
     // GIM_CheckIsSameOperand predicates by the defineOperand method.
+    const CGIOperandList::OperandInfo &DstIOperand = DstI.Operands[I];
     OM.setSymbolicName(getMangledRootDefName(DstIOperand.Name));
     M.defineOperand(OM.getSymbolicName(), OM);
 
@@ -2130,16 +2153,11 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
   }
 
   auto DstMIBuilderOrError =
-      createAndImportInstructionRenderer(M, InsnMatcher, Src, Dst);
+      createAndImportInstructionRenderer(M, InsnMatcher, P, Src, Dst);
   if (auto Error = DstMIBuilderOrError.takeError())
     return std::move(Error);
   BuildMIAction &DstMIBuilder = DstMIBuilderOrError.get();
 
-  // Render the implicit defs.
-  // These are only added to the root of the result.
-  if (auto Error = importImplicitDefRenderers(DstMIBuilder, P.getDstRegs()))
-    return std::move(Error);
-
   DstMIBuilder.chooseInsnToMutate(M);
 
   // Constrain the registers to classes. This is normally derived from the
@@ -2176,15 +2194,15 @@ GlobalISelEmitter::buildMatchTable(MutableArrayRef<RuleMatcher> Rules,
       OpcodeOrder[Opcode] = CurrentOrdering++;
   }
 
-  llvm::stable_sort(InputRules, [&OpcodeOrder](const Matcher *A,
-                                               const Matcher *B) {
-    auto *L = static_cast<const RuleMatcher *>(A);
-    auto *R = static_cast<const RuleMatcher *>(B);
-    return std::tuple(OpcodeOrder[L->getOpcode()],
-                      L->insnmatchers_front().getNumOperandMatchers()) <
-           std::tuple(OpcodeOrder[R->getOpcode()],
-                      R->insnmatchers_front().getNumOperandMatchers());
-  });
+  llvm::stable_sort(
+      InputRules, [&OpcodeOrder](const Matcher *A, const Matcher *B) {
+        auto *L = static_cast<const RuleMatcher *>(A);
+        auto *R = static_cast<const RuleMatcher *>(B);
+        return std::tuple(OpcodeOrder[L->getOpcode()],
+                          L->insnmatchers_front().getNumOperandMatchers()) <
+               std::tuple(OpcodeOrder[R->getOpcode()],
+                          R->insnmatchers_front().getNumOperandMatchers());
+      });
 
   for (Matcher *Rule : InputRules)
     Rule->optimize();
@@ -2299,31 +2317,6 @@ void GlobalISelEmitter::emitRunCustomAction(raw_ostream &OS) {
      << "}\n";
 }
 
-void GlobalISelEmitter::postProcessRule(RuleMatcher &M) {
-  SmallPtrSet<const Record *, 16> UsedRegs;
-
-  // TODO: deal with subregs?
-  for (auto &A : M.actions()) {
-    auto *MI = dyn_cast<BuildMIAction>(A.get());
-    if (!MI)
-      continue;
-
-    for (auto *Use : MI->getCGI()->ImplicitUses)
-      UsedRegs.insert(Use);
-  }
-
-  for (auto &A : M.actions()) {
-    auto *MI = dyn_cast<BuildMIAction>(A.get());
-    if (!MI)
-      continue;
-
-    for (auto *Def : MI->getCGI()->ImplicitDefs) {
-      if (!UsedRegs.contains(Def))
-        MI->setDeadImplicitDef(Def);
-    }
-  }
-}
-
 void GlobalISelEmitter::run(raw_ostream &OS) {
   if (!UseCoverageFile.empty()) {
     RuleCoverage = CodeGenCoverage();
@@ -2383,7 +2376,6 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
                      "Pattern is not covered by a test");
     }
     Rules.push_back(std::move(MatcherOrErr.get()));
-    postProcessRule(Rules.back());
   }
 
   // Comparison function to order records by name.



More information about the llvm-commits mailing list