[llvm] 1941f34 - [TableGen][GISel] Import more "multi-level" patterns (#120332)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 18 03:45:00 PST 2024
Author: Sergei Barannikov
Date: 2024-12-18T14:44:55+03:00
New Revision: 1941f341722178390f71e07502e08a2250a704c7
URL: https://github.com/llvm/llvm-project/commit/1941f341722178390f71e07502e08a2250a704c7
DIFF: https://github.com/llvm/llvm-project/commit/1941f341722178390f71e07502e08a2250a704c7.diff
LOG: [TableGen][GISel] Import more "multi-level" patterns (#120332)
Previously, if the destination DAG has an untyped leaf, we would import
the pattern only if that leaf is defined by the *top-level* source DAG.
This is an unnecessary restriction.
Here is an example of such pattern:
```
def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC),
(VMLADDUHM $vA, $vB, $vC)>;
```
Previously, it failed to import because `add` doesn't define neither
`$vA` nor `$vB`.
This change reduces the number of skipped patterns as follows:
```
AArch64: 8695 -> 8548 (-147)
AMDGPU: 11333 -> 11240 (-93)
ARM: 4297 -> 4278 (-1)
PowerPC: 3955 -> 3010 (-945)
```
Other GISel-enabled targets are unaffected.
Added:
Modified:
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
llvm/test/CodeGen/AMDGPU/constrained-shift.ll
llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
llvm/utils/TableGen/GlobalISelEmitter.cpp
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir
index fb7c2d4d705e75..95d2bae98df2e1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir
@@ -274,24 +274,18 @@ body: |
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
- ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_e64_]], implicit $exec
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ASHRREV_I16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX9-LABEL: name: ashr_s16_vv_zext_to_s64
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
- ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_e64_]], implicit $exec
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ASHRREV_I16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX10-LABEL: name: ashr_s16_vv_zext_to_s64
; GFX10: liveins: $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir
index 779312596313a3..3a2ed71e4d2242 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir
@@ -79,9 +79,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY1]], 0, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+ ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = G_CTPOP %0
@@ -104,9 +103,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY1]], [[V_BCNT_U32_B32_e64_]], 0, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+ ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = G_CTPOP %0
@@ -155,9 +153,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY1]], 0, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+ ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s32) = G_CTPOP %0
@@ -181,9 +178,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY1]], 0, implicit $exec
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY]], 0, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+ ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s32) = G_CTPOP %1
@@ -207,9 +203,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; CHECK-NEXT: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def dead $scc
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_BCNT1_I32_B32_]], [[COPY1]], 0, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+ ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_CTPOP %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir
index e7ec5fcbba2473..a96b574a647848 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir
@@ -272,24 +272,18 @@ body: |
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
- ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_e64_]], implicit $exec
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHRREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX9-LABEL: name: lshr_s16_vv_zext_to_s64
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
- ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_e64_]], implicit $exec
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHRREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX10-LABEL: name: lshr_s16_vv_zext_to_s64
; GFX10: liveins: $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir
index bcb6d75c18302b..b0703a642e033a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir
@@ -272,24 +272,18 @@ body: |
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
- ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHLREV_B16_e64_]], implicit $exec
- ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHLREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX9-LABEL: name: shl_s16_vv_zext_to_s64
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
- ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHLREV_B16_e64_]], implicit $exec
- ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHLREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX10-LABEL: name: shl_s16_vv_zext_to_s64
; GFX10: liveins: $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
index 6bb4e2d3dbe26e..ed85fb19d90517 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
@@ -204,18 +204,37 @@ define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b)
}
define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) {
-; GCN-LABEL: vector_xnor_i32_one_use:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
-; GCN-NEXT: v_not_b32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX7-LABEL: vector_xnor_i32_one_use:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1
+; GFX7-NEXT: v_not_b32_e32 v0, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: vector_xnor_i32_one_use:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1
+; GFX8-NEXT: v_not_b32_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: vector_xnor_i32_one_use:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1
+; GFX900-NEXT: v_not_b32_e32 v0, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-LABEL: vector_xnor_i32_one_use:
+; GFX906: ; %bb.0: ; %entry
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v1
+; GFX906-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: vector_xnor_i32_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1
-; GFX10-NEXT: v_not_b32_e32 v0, v0
+; GFX10-NEXT: v_xnor_b32_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%xor = xor i32 %a, %b
@@ -224,22 +243,45 @@ entry:
}
define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) {
-; GCN-LABEL: vector_xnor_i64_one_use:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_xor_b32_e32 v0, v0, v2
-; GCN-NEXT: v_xor_b32_e32 v1, v1, v3
-; GCN-NEXT: v_not_b32_e32 v0, v0
-; GCN-NEXT: v_not_b32_e32 v1, v1
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX7-LABEL: vector_xnor_i64_one_use:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_xor_b32_e32 v0, v0, v2
+; GFX7-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX7-NEXT: v_not_b32_e32 v0, v0
+; GFX7-NEXT: v_not_b32_e32 v1, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: vector_xnor_i64_one_use:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_xor_b32_e32 v0, v0, v2
+; GFX8-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX8-NEXT: v_not_b32_e32 v0, v0
+; GFX8-NEXT: v_not_b32_e32 v1, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: vector_xnor_i64_one_use:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_xor_b32_e32 v0, v0, v2
+; GFX900-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX900-NEXT: v_not_b32_e32 v0, v0
+; GFX900-NEXT: v_not_b32_e32 v1, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-LABEL: vector_xnor_i64_one_use:
+; GFX906: ; %bb.0: ; %entry
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v2
+; GFX906-NEXT: v_xnor_b32_e32 v1, v1, v3
+; GFX906-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: vector_xnor_i64_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2
-; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
-; GFX10-NEXT: v_not_b32_e32 v0, v0
-; GFX10-NEXT: v_not_b32_e32 v1, v1
+; GFX10-NEXT: v_xnor_b32_e32 v0, v0, v2
+; GFX10-NEXT: v_xnor_b32_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%xor = xor i64 %a, %b
@@ -248,16 +290,32 @@ entry:
}
define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
-; GCN-LABEL: xnor_s_v_i32_one_use:
-; GCN: ; %bb.0:
-; GCN-NEXT: v_xor_b32_e32 v0, s0, v0
-; GCN-NEXT: v_not_b32_e32 v0, v0
-; GCN-NEXT: ; return to shader part epilog
+; GFX7-LABEL: xnor_s_v_i32_one_use:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX7-NEXT: v_not_b32_e32 v0, v0
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: xnor_s_v_i32_one_use:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX900-LABEL: xnor_s_v_i32_one_use:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX900-NEXT: v_not_b32_e32 v0, v0
+; GFX900-NEXT: ; return to shader part epilog
+;
+; GFX906-LABEL: xnor_s_v_i32_one_use:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0
+; GFX906-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: xnor_s_v_i32_one_use:
; GFX10: ; %bb.0:
-; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX10-NEXT: v_not_b32_e32 v0, v0
+; GFX10-NEXT: v_xnor_b32_e32 v0, s0, v0
; GFX10-NEXT: ; return to shader part epilog
%xor = xor i32 %s, %v
%d = xor i32 %xor, -1
@@ -266,16 +324,32 @@ define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
}
define amdgpu_ps float @xnor_v_s_i32_one_use(i32 inreg %s, i32 %v) {
-; GCN-LABEL: xnor_v_s_i32_one_use:
-; GCN: ; %bb.0:
-; GCN-NEXT: v_xor_b32_e32 v0, s0, v0
-; GCN-NEXT: v_not_b32_e32 v0, v0
-; GCN-NEXT: ; return to shader part epilog
+; GFX7-LABEL: xnor_v_s_i32_one_use:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX7-NEXT: v_not_b32_e32 v0, v0
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: xnor_v_s_i32_one_use:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX900-LABEL: xnor_v_s_i32_one_use:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX900-NEXT: v_not_b32_e32 v0, v0
+; GFX900-NEXT: ; return to shader part epilog
+;
+; GFX906-LABEL: xnor_v_s_i32_one_use:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0
+; GFX906-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: xnor_v_s_i32_one_use:
; GFX10: ; %bb.0:
-; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX10-NEXT: v_not_b32_e32 v0, v0
+; GFX10-NEXT: v_xnor_b32_e64 v0, v0, s0
; GFX10-NEXT: ; return to shader part epilog
%xor = xor i32 %v, %s
%d = xor i32 %xor, -1
@@ -314,19 +388,15 @@ define amdgpu_ps <2 x float> @xnor_i64_s_v_one_use(i64 inreg %a, i64 %b64) {
; GFX906-LABEL: xnor_i64_s_v_one_use:
; GFX906: ; %bb.0: ; %entry
; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
-; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1
-; GFX906-NEXT: v_not_b32_e32 v0, v0
-; GFX906-NEXT: v_not_b32_e32 v1, v1
+; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0
+; GFX906-NEXT: v_xnor_b32_e32 v1, s1, v1
; GFX906-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: xnor_i64_s_v_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
-; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1
-; GFX10-NEXT: v_not_b32_e32 v0, v0
-; GFX10-NEXT: v_not_b32_e32 v1, v1
+; GFX10-NEXT: v_xnor_b32_e32 v0, s0, v0
+; GFX10-NEXT: v_xnor_b32_e32 v1, s1, v1
; GFX10-NEXT: ; return to shader part epilog
entry:
%b = shl i64 %b64, 29
@@ -367,19 +437,15 @@ define amdgpu_ps <2 x float> @xnor_i64_v_s_one_use(i64 inreg %a, i64 %b64) {
; GFX906-LABEL: xnor_i64_v_s_one_use:
; GFX906: ; %bb.0:
; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
-; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1
-; GFX906-NEXT: v_not_b32_e32 v0, v0
-; GFX906-NEXT: v_not_b32_e32 v1, v1
+; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0
+; GFX906-NEXT: v_xnor_b32_e64 v1, v1, s1
; GFX906-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: xnor_i64_v_s_one_use:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
-; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1
-; GFX10-NEXT: v_not_b32_e32 v0, v0
-; GFX10-NEXT: v_not_b32_e32 v1, v1
+; GFX10-NEXT: v_xnor_b32_e64 v0, v0, s0
+; GFX10-NEXT: v_xnor_b32_e64 v1, v1, s1
; GFX10-NEXT: ; return to shader part epilog
%b = shl i64 %b64, 29
%xor = xor i64 %b, %a
@@ -419,7 +485,7 @@ define i32 @vector_xor_na_b_i32_one_use(i32 %a, i32 %b) {
; GFX10-LABEL: vector_xor_na_b_i32_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_xor3_b32 v0, v0, -1, v1
+; GFX10-NEXT: v_xnor_b32_e32 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%na = xor i32 %a, -1
@@ -458,7 +524,7 @@ define i32 @vector_xor_a_nb_i32_one_use(i32 %a, i32 %b) {
; GFX10-LABEL: vector_xor_a_nb_i32_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_xor3_b32 v0, v1, -1, v0
+; GFX10-NEXT: v_xnor_b32_e32 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%nb = xor i32 %b, -1
diff --git a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll
index 1b35a89ad7f935..4011c21af69046 100644
--- a/llvm/test/CodeGen/AMDGPU/constrained-shift.ll
+++ b/llvm/test/CodeGen/AMDGPU/constrained-shift.ll
@@ -139,10 +139,6 @@ define <4 x i32> @csh_v4i32(<4 x i32> %a, <4 x i32> %b) {
; GISEL-LABEL: csh_v4i32:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT: v_and_b32_e32 v4, 31, v4
-; GISEL-NEXT: v_and_b32_e32 v5, 31, v5
-; GISEL-NEXT: v_and_b32_e32 v6, 31, v6
-; GISEL-NEXT: v_and_b32_e32 v7, 31, v7
; GISEL-NEXT: v_lshlrev_b32_e32 v8, v4, v0
; GISEL-NEXT: v_lshlrev_b32_e32 v9, v5, v1
; GISEL-NEXT: v_lshlrev_b32_e32 v10, v6, v2
diff --git a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
index 9f093cc7b5abf2..26a4ea9d8a4b6e 100644
--- a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
@@ -230,49 +230,27 @@ entry:
}
define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) {
-; GFX67-SDAG-LABEL: clpeak_imad_pat_i16:
-; GFX67-SDAG: ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v0, v2
-; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: clpeak_imad_pat_i16:
-; GFX67-GISEL: ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX67-LABEL: clpeak_imad_pat_i16:
+; GFX67: ; %bb.0: ; %entry
+; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v3, v0, v2
+; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
+; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX67-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_i16:
; GFX8-SDAG: ; %bb.0: ; %entry
@@ -337,11 +315,11 @@ define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) {
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -363,13 +341,13 @@ define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
@@ -400,13 +378,13 @@ define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) {
; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
@@ -470,42 +448,40 @@ define <2 x i16> @clpeak_imad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v5, v3, 1
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v4, v2, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v0
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
-; GFX67-GISEL-NEXT: v_or_b32_e32 v3, v3, v4
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2
+; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v6
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i16:
@@ -682,46 +658,43 @@ define <3 x i16> @clpeak_imad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v6, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v7, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v6, v3, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v8, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v7, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v7, v4, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v8, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v8, v5, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v9, v0, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v6, v6, v3, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v10, v1, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v7, v7, v4, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v3, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v9
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v6
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v11, v2, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v8, v8, v5, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v2, v5, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v4
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v10
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v7
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v5
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v11
+; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v8
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v8
-; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
-; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v6
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v3, v0
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v8
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v3, v2
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_v3i16:
@@ -1063,19 +1036,15 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v10, v10, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v10, v5, v1
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v11, 16, v9
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v8, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9
; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v9, v9, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v11, v11, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v8, v4, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v11, v3
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v9, v6, v2
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v11, v7, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
@@ -1085,60 +1054,60 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX67-GISEL-NEXT: v_or_b32_e32 v1, v1, v2
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v10
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v13, v2, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v10, v10, v5, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v2, v5, 1
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v12, v0, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v8, v8, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v10
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v8
-; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v9
-; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v11
-; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
-; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
-; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v6
-; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v7
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6
-; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v2
-; GFX67-GISEL-NEXT: v_or_b32_e32 v5, v5, v6
-; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7
-; GFX67-GISEL-NEXT: v_or_b32_e32 v6, v6, v7
-; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v8
-; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v9
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8
-; GFX67-GISEL-NEXT: v_or_b32_e32 v7, v7, v8
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v15, v3, v7
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v11, v11, v7, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v3, v7, 1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v14, v1, v6
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v9, v9, v6, 1
+; GFX67-GISEL-NEXT: v_or_b32_e32 v8, v8, v10
+; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v11
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v6, 1
+; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10
+; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v12
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v8
+; GFX67-GISEL-NEXT: v_or_b32_e32 v9, v9, v10
+; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v1, v2
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v8
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v5
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v13
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v5, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v14
+; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v9
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v9
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v6
+; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v15
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v6, v3
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v4, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v5
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v9, 16, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v8
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v1, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v1, v9
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v6
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v7
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v2, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v7
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v5
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v6
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v4, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v7
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_v4i16:
@@ -1403,47 +1372,26 @@ entry:
}
define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) {
-; GFX67-SDAG-LABEL: clpeak_umad_pat_i16:
-; GFX67-SDAG: ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v0, v1
-; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 1, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v3, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v3, v2, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: clpeak_umad_pat_i16:
-; GFX67-GISEL: ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX67-LABEL: clpeak_umad_pat_i16:
+; GFX67: ; %bb.0: ; %entry
+; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_mul_u32_u24_e32 v2, v0, v1
+; GFX67-NEXT: v_add_i32_e32 v3, vcc, 1, v1
+; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v3, v2
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT: v_mad_u32_u24 v1, v3, v2, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_umad_pat_i16:
; GFX8-SDAG: ; %bb.0: ; %entry
@@ -1504,11 +1452,11 @@ define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) {
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1530,13 +1478,13 @@ define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -1567,13 +1515,13 @@ define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) {
; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -1637,42 +1585,40 @@ define <2 x i16> @clpeak_umad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v5, v3, 1
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v4, v2, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v0
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
-; GFX67-GISEL-NEXT: v_or_b32_e32 v3, v3, v4
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2
+; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v6
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_umad_pat_v2i16:
@@ -1849,46 +1795,43 @@ define <3 x i16> @clpeak_umad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v6, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v7, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v6, v3, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v8, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v7, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v7, v4, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v8, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v8, v5, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v9, v0, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v6, v6, v3, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v10, v1, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v7, v7, v4, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v3, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v9
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v6
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v11, v2, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v8, v8, v5, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v2, v5, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v4
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v10
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v7
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v5
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v11
+; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v8
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v8
-; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
-; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v6
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v3, v0
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v8
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v3, v2
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_umad_pat_v3i16:
@@ -2230,19 +2173,15 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v10, v10, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v10, v5, v1
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v11, 16, v9
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v8, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9
; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v9, v9, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v11, v11, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v8, v4, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v11, v3
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v9, v6, v2
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v11, v7, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
@@ -2252,60 +2191,60 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX67-GISEL-NEXT: v_or_b32_e32 v1, v1, v2
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v10
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v13, v2, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v10, v10, v5, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v2, v5, 1
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v12, v0, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v8, v8, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v10
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v8
-; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v9
-; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v11
-; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
-; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
-; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v6
-; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v7
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6
-; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v2
-; GFX67-GISEL-NEXT: v_or_b32_e32 v5, v5, v6
-; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
-; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7
-; GFX67-GISEL-NEXT: v_or_b32_e32 v6, v6, v7
-; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v8
-; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v9
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8
-; GFX67-GISEL-NEXT: v_or_b32_e32 v7, v7, v8
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v15, v3, v7
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v11, v11, v7, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v3, v7, 1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v14, v1, v6
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v9, v9, v6, 1
+; GFX67-GISEL-NEXT: v_or_b32_e32 v8, v8, v10
+; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v11
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v6, 1
+; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10
+; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v12
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v8
+; GFX67-GISEL-NEXT: v_or_b32_e32 v9, v9, v10
+; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v1, v2
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v8
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v5
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v13
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v5, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v14
+; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v9
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v9
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v6
+; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v15
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v6, v3
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v4, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v5
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v9, 16, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v8
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v1, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v1, v9
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v6
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v7
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v2, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v7
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v6
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v5
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v6
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v4, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v7
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_umad_pat_v4i16:
@@ -4282,49 +4221,27 @@ entry:
}
define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) {
-; GFX67-SDAG-LABEL: clpeak_imad_pat_i8:
-; GFX67-SDAG: ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v0, v2
-; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: clpeak_imad_pat_i8:
-; GFX67-GISEL: ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX67-LABEL: clpeak_imad_pat_i8:
+; GFX67: ; %bb.0: ; %entry
+; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX67-NEXT: v_and_b32_e32 v2, 0xff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v3, v0, v2
+; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
+; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX67-NEXT: v_and_b32_e32 v3, 0xff, v3
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX67-NEXT: v_and_b32_e32 v2, 0xff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
+; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX67-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_i8:
; GFX8-SDAG: ; %bb.0: ; %entry
@@ -4389,11 +4306,11 @@ define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) {
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -4415,13 +4332,13 @@ define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8
@@ -4452,13 +4369,13 @@ define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) {
; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8
@@ -4524,32 +4441,30 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) {
; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v4, v2, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v5, v3, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v6
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v7
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v5
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v2, v1
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i8:
@@ -4655,20 +4570,18 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) {
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v4, v0, v2
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v5, v1, v3
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v4, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v5, v1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
-; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v4, 1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v3, v5, 1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v4, v0, 1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v5, v1, 1
+; GFX10-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0
+; GFX10-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1
+; GFX10-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v6, v4, v2
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v7, v5, v3
+; GFX10-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1
+; GFX10-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v6, v0
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v7, v1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2
; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v4
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v5
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i8:
@@ -4704,25 +4617,21 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) {
; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v4, v0, v2
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v5, v1, v3
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v4, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v5, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
-; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v4, 1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v3, v5, 1
+; GFX11-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0
+; GFX11-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1
+; GFX11-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v4, v0, 1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v5, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v6, v4, v2
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v7, v5, v3
+; GFX11-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1
+; GFX11-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v6, v0
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v7, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2
; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v4
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v5
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1200-SDAG-LABEL: clpeak_imad_pat_v2i8:
@@ -4766,25 +4675,21 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) {
; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v4, v0, v2
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v5, v1, v3
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v4, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v5, v1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v4, 1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v3, v5, 1
+; GFX1200-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0
+; GFX1200-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1
+; GFX1200-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v4, v0, 1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v5, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v6, v4, v2
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v7, v5, v3
+; GFX1200-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1
+; GFX1200-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v6, v0
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v7, v1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v4
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v5
; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
%y18 = add <2 x i8> %x, <i8 1, i8 1>
@@ -7600,81 +7505,43 @@ entry:
}
define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) {
-; GFX67-SDAG-LABEL: clpeak_imad_pat_i16_x2:
-; GFX67-SDAG: ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v0, v2
-; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: clpeak_imad_pat_i16_x2:
-; GFX67-GISEL: ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX67-LABEL: clpeak_imad_pat_i16_x2:
+; GFX67: ; %bb.0: ; %entry
+; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v3, v0, v2
+; GFX67-NEXT: v_add_i32_e32 v1, vcc, 1, v1
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v2
+; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v2
+; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v1, v3
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v2, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v2
+; GFX67-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX67-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_i16_x2:
; GFX8-SDAG: ; %bb.0: ; %entry
@@ -7767,19 +7634,19 @@ define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) {
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX10-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX10-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -7807,23 +7674,23 @@ define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
@@ -7860,23 +7727,23 @@ define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) {
; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
@@ -7902,79 +7769,42 @@ entry:
}
define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) {
-; GFX67-SDAG-LABEL: clpeak_umad_pat_i16_x2:
-; GFX67-SDAG: ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v0, v1
-; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 1, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v3, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v1
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v3, v2, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v2, v3
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v1
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v2, v3, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v2, v3
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v2, v3, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: clpeak_umad_pat_i16_x2:
-; GFX67-GISEL: ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX67-LABEL: clpeak_umad_pat_i16_x2:
+; GFX67: ; %bb.0: ; %entry
+; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_mul_u32_u24_e32 v2, v0, v1
+; GFX67-NEXT: v_add_i32_e32 v3, vcc, 1, v1
+; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v3, v2
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v1
+; GFX67-NEXT: v_mad_u32_u24 v2, v3, v2, 1
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v2, v3
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v1
+; GFX67-NEXT: v_mad_u32_u24 v2, v2, v3, 1
+; GFX67-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v2, v3
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v4
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT: v_mad_u32_u24 v1, v2, v3, 1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_umad_pat_i16_x2:
; GFX8-SDAG: ; %bb.0: ; %entry
@@ -8063,19 +7893,19 @@ define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) {
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX10-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX10-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX10-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -8103,23 +7933,23 @@ define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) {
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -8156,23 +7986,23 @@ define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) {
; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -8268,10 +8098,8 @@ define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
@@ -8279,9 +8107,9 @@ define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v5, v3, 1
; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v4, v2, 1
; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
@@ -8290,64 +8118,60 @@ define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v4
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v1, v5, v3
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v0, v4, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v5, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v0
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v4
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
-; GFX67-GISEL-NEXT: v_or_b32_e32 v3, v3, v4
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v3, v5, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v2, v4, v0
+; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v3, v5, 1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v2, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
+; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2
+; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v6
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i16_x2:
@@ -8591,10 +8415,8 @@ define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v5, v3, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v4, v2, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
@@ -8602,9 +8424,9 @@ define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v5, v3, 1
; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v4, v2, 1
; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
@@ -8613,64 +8435,60 @@ define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v4
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v1, v5, v3
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v0, v4, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v5, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v0
; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v4
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2
; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v1
-; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
-; GFX67-GISEL-NEXT: v_or_b32_e32 v3, v3, v4
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v3, v5, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v2, v4, v0
+; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v5, v3, v5, 1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v4, v2, v4, 1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
+; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v1, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, 1
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v0, v2
+; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, 1
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
-; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v6
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
+; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: clpeak_umad_pat_v2i16_x2:
@@ -8908,24 +8726,14 @@ entry:
}
define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) {
-; GFX67-SDAG-LABEL: multi_use_mul_mad_i16_var:
-; GFX67-SDAG: ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v4, v1, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v4, v1, v3
-; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: multi_use_mul_mad_i16_var:
-; GFX67-GISEL: ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v0, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v2
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
-; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX67-LABEL: multi_use_mul_mad_i16_var:
+; GFX67: ; %bb.0: ; %entry
+; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT: v_and_b32_e32 v4, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_mad_u32_u24 v0, v4, v1, v2
+; GFX67-NEXT: v_mad_u32_u24 v1, v4, v1, v3
+; GFX67-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: multi_use_mul_mad_i16_var:
; GFX8-SDAG: ; %bb.0: ; %entry
@@ -8973,10 +8781,9 @@ define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) {
; GFX10-GISEL-LABEL: multi_use_mul_mad_i16_var:
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v0, v2
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, v3
-; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2
+; GFX10-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3
+; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -8992,12 +8799,10 @@ define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) {
; GFX11-GISEL-LABEL: multi_use_mul_mad_i16_var:
; GFX11-GISEL: ; %bb.0: ; %entry
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v0, v2
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, v3
-; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2
+; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2
; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -9021,12 +8826,10 @@ define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) {
; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0
; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0
; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v0, v2
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, v3
-; GFX1200-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2
+; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3
+; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2
; GFX1200-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -9108,29 +8911,17 @@ entry:
}
define i16 @other_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z, ptr addrspace(3) %ptr) {
-; GFX67-SDAG-LABEL: other_use_mul_mad_i16_var:
-; GFX67-SDAG: ; %bb.0: ; %entry
-; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v1
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, v2
-; GFX67-SDAG-NEXT: s_mov_b32 m0, -1
-; GFX67-SDAG-NEXT: ds_write_b16 v3, v4
-; GFX67-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX67-GISEL-LABEL: other_use_mul_mad_i16_var:
-; GFX67-GISEL: ; %bb.0: ; %entry
-; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v0, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v2
-; GFX67-GISEL-NEXT: s_mov_b32 m0, -1
-; GFX67-GISEL-NEXT: ds_write_b16 v3, v1
-; GFX67-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX67-LABEL: other_use_mul_mad_i16_var:
+; GFX67: ; %bb.0: ; %entry
+; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-NEXT: v_mul_u32_u24_e32 v4, v0, v1
+; GFX67-NEXT: v_mad_u32_u24 v0, v0, v1, v2
+; GFX67-NEXT: s_mov_b32 m0, -1
+; GFX67-NEXT: ds_write_b16 v3, v4
+; GFX67-NEXT: s_waitcnt lgkmcnt(0)
+; GFX67-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: other_use_mul_mad_i16_var:
; GFX8: ; %bb.0: ; %entry
@@ -9151,69 +8942,36 @@ define i16 @other_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z, ptr addrspace(3) %
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: other_use_mul_mad_i16_var:
-; GFX10-SDAG: ; %bb.0: ; %entry
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_mul_lo_u16 v4, v0, v1
-; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v2
-; GFX10-SDAG-NEXT: ds_write_b16 v3, v4
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: other_use_mul_mad_i16_var:
-; GFX10-GISEL: ; %bb.0: ; %entry
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v0, v1
-; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v1, v2
-; GFX10-GISEL-NEXT: ds_write_b16 v3, v1
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: other_use_mul_mad_i16_var:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_mul_lo_u16 v4, v0, v1
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v1, v2
-; GFX11-SDAG-NEXT: ds_store_b16 v3, v4
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: other_use_mul_mad_i16_var:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v0, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v1, v2
-; GFX11-GISEL-NEXT: ds_store_b16 v3, v1
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: other_use_mul_mad_i16_var:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_mul_lo_u16 v4, v0, v1
+; GFX10-NEXT: v_mad_u16 v0, v0, v1, v2
+; GFX10-NEXT: ds_write_b16 v3, v4
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1200-SDAG-LABEL: other_use_mul_mad_i16_var:
-; GFX1200-SDAG: ; %bb.0: ; %entry
-; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX1200-SDAG-NEXT: v_mul_lo_u16 v4, v0, v1
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v1, v2
-; GFX1200-SDAG-NEXT: ds_store_b16 v3, v4
-; GFX1200-SDAG-NEXT: s_wait_dscnt 0x0
-; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: other_use_mul_mad_i16_var:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_lo_u16 v4, v0, v1
+; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2
+; GFX11-NEXT: ds_store_b16 v3, v4
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1200-GISEL-LABEL: other_use_mul_mad_i16_var:
-; GFX1200-GISEL: ; %bb.0: ; %entry
-; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v0, v1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v1, v2
-; GFX1200-GISEL-NEXT: ds_store_b16 v3, v1
-; GFX1200-GISEL-NEXT: s_wait_dscnt 0x0
-; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1200-LABEL: other_use_mul_mad_i16_var:
+; GFX1200: ; %bb.0: ; %entry
+; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-NEXT: s_wait_expcnt 0x0
+; GFX1200-NEXT: s_wait_samplecnt 0x0
+; GFX1200-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-NEXT: s_wait_kmcnt 0x0
+; GFX1200-NEXT: v_mul_lo_u16 v4, v0, v1
+; GFX1200-NEXT: v_mad_u16 v0, v0, v1, v2
+; GFX1200-NEXT: ds_store_b16 v3, v4
+; GFX1200-NEXT: s_wait_dscnt 0x0
+; GFX1200-NEXT: s_setpc_b64 s[30:31]
entry:
%mul = mul i16 %x, %y
%add0 = add i16 %mul, %z
@@ -9246,16 +9004,14 @@ define <4 x i16> @multi_use_mul_mad_v2i16_var(<2 x i16> %x, <2 x i16> %y, <2 x i
; GFX67-GISEL-LABEL: multi_use_mul_mad_v2i16_var:
; GFX67-GISEL: ; %bb.0: ; %entry
; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v0, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v0, v1
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v4
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v3, v5
-; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6
-; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
+; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v8, v2, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v9, v3, v5
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v2, v8, v2, v6
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v3, v9, v3, v7
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: multi_use_mul_mad_v2i16_var:
@@ -9366,20 +9122,20 @@ define <2 x i16> @other_use_mul_mad_v2i16_var(<2 x i16> %x, <2 x i16> %y, <2 x i
; GFX67-GISEL-LABEL: other_use_mul_mad_v2i16_var:
; GFX67-GISEL: ; %bb.0: ; %entry
; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3
-; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
-; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v1
-; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
-; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
-; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v1, v3
+; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v0, v2
+; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8
+; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
+; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8
+; GFX67-GISEL-NEXT: v_or_b32_e32 v7, v7, v8
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v0, v0, v2, v4
+; GFX67-GISEL-NEXT: v_mad_u32_u24 v1, v1, v3, v5
; GFX67-GISEL-NEXT: s_mov_b32 m0, -1
-; GFX67-GISEL-NEXT: ds_write_b32 v6, v2
+; GFX67-GISEL-NEXT: ds_write_b32 v6, v7
; GFX67-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -9532,29 +9288,15 @@ define i64 @mul_u24_add64(i32 %x, i32 %y, i64 %z) {
; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1200-SDAG-LABEL: mul_u24_add64:
-; GFX1200-SDAG: ; %bb.0:
-; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX1200-SDAG-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3]
-; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-GISEL-LABEL: mul_u24_add64:
-; GFX1200-GISEL: ; %bb.0:
-; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX1200-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v1
-; GFX1200-GISEL-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2
-; GFX1200-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1200-LABEL: mul_u24_add64:
+; GFX1200: ; %bb.0:
+; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-NEXT: s_wait_expcnt 0x0
+; GFX1200-NEXT: s_wait_samplecnt 0x0
+; GFX1200-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-NEXT: s_wait_kmcnt 0x0
+; GFX1200-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3]
+; GFX1200-NEXT: s_setpc_b64 s[30:31]
%mul = call i64 @llvm.amdgcn.mul.u24.i64(i32 %x, i32 %y)
%add = add i64 %mul, %z
ret i64 %add
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 83599e789e10b9..84f23985b64213 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -1350,13 +1350,10 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
// Handle the case where the MVT/register class is omitted in the dest pattern
// but MVT exists in the source pattern.
- if (isa<UnsetInit>(DstChild.getLeafValue())) {
- for (const TreePatternNode &SrcChild : Src.children()) {
- if (SrcChild.getName() == DstChild.getName()) {
- DstMIBuilder.addRenderer<CopyRenderer>(SrcChild.getName());
- return InsertPt;
- }
- }
+ if (isa<UnsetInit>(DstChild.getLeafValue()) &&
+ Rule.hasOperand(DstChild.getName())) {
+ DstMIBuilder.addRenderer<CopyRenderer>(DstChild.getName());
+ return InsertPt;
}
return failedImport("Dst pattern child is an unsupported kind");
}
More information about the llvm-commits
mailing list