[llvm] [AMDGPU] Disable V_MAD_U64_U32/V_MAD_I64_I32 workaround for GFX11.5 (PR #79460)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 25 07:25:54 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-globalisel

Author: Jay Foad (jayfoad)

<details>
<summary>Changes</summary>

The hardware bug only affects GFX11.0.x.


---

Patch is 36.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/79460.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir (+38-19) 
- (modified) llvm/test/CodeGen/AMDGPU/mad_64_32.ll (+323-164) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 250e3e350c02e9..2a40129661102b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1445,13 +1445,13 @@ def FeatureISAVersion11_Common : FeatureSet<
    FeatureFlatAtomicFaddF32Inst,
    FeatureImageInsts,
    FeaturePackedTID,
-   FeatureVcmpxPermlaneHazard,
-   FeatureMADIntraFwdBug]>;
+   FeatureVcmpxPermlaneHazard]>;
 
 def FeatureISAVersion11_0_Common : FeatureSet<
   !listconcat(FeatureISAVersion11_Common.Features,
     [FeatureMSAALoadDstSelBug,
-     FeatureVALUTransUseHazard])>;
+     FeatureVALUTransUseHazard,
+     FeatureMADIntraFwdBug])>;
 
 def FeatureISAVersion11_0_0 : FeatureSet<
   !listconcat(FeatureISAVersion11_0_Common.Features,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir
index d07434dcd38c13..b886af5bf73102 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir
@@ -1,7 +1,8 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX11 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX12 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefixes=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefixes=GFX11,GFX1100 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefixes=GFX11,GFX1150 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefixes=GFX12 %s
 
 ---
 name: mad_u64_u32_vvv
@@ -20,14 +21,23 @@ body: |
     ; GFX10-NEXT: [[V_MAD_U64_U32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec
     ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_U64_U32_e64_]], implicit [[V_MAD_U64_U32_e64_1]]
     ;
-    ; GFX11-LABEL: name: mad_u64_u32_vvv
-    ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3
-    ; GFX11-NEXT: [[V_MAD_U64_U32_gfx11_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_gfx11_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_gfx11_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAD_U64_U32_gfx11_e64_]], implicit [[V_MAD_U64_U32_gfx11_e64_1]]
+    ; GFX1100-LABEL: name: mad_u64_u32_vvv
+    ; GFX1100: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; GFX1100-NEXT: {{  $}}
+    ; GFX1100-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX1100-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX1100-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3
+    ; GFX1100-NEXT: [[V_MAD_U64_U32_gfx11_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_gfx11_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_gfx11_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX1100-NEXT: S_ENDPGM 0, implicit [[V_MAD_U64_U32_gfx11_e64_]], implicit [[V_MAD_U64_U32_gfx11_e64_1]]
+    ;
+    ; GFX1150-LABEL: name: mad_u64_u32_vvv
+    ; GFX1150: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; GFX1150-NEXT: {{  $}}
+    ; GFX1150-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3
+    ; GFX1150-NEXT: [[V_MAD_U64_U32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX1150-NEXT: S_ENDPGM 0, implicit [[V_MAD_U64_U32_e64_]], implicit [[V_MAD_U64_U32_e64_1]]
     ;
     ; GFX12-LABEL: name: mad_u64_u32_vvv
     ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -63,14 +73,23 @@ body: |
     ; GFX10-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec
     ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_I64_I32_e64_]], implicit [[V_MAD_I64_I32_e64_1]]
     ;
-    ; GFX11-LABEL: name: mad_i64_i32_vvv
-    ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3
-    ; GFX11-NEXT: [[V_MAD_I64_I32_gfx11_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_gfx11_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_gfx11_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAD_I64_I32_gfx11_e64_]], implicit [[V_MAD_I64_I32_gfx11_e64_1]]
+    ; GFX1100-LABEL: name: mad_i64_i32_vvv
+    ; GFX1100: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; GFX1100-NEXT: {{  $}}
+    ; GFX1100-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX1100-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX1100-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3
+    ; GFX1100-NEXT: [[V_MAD_I64_I32_gfx11_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_gfx11_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_gfx11_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX1100-NEXT: S_ENDPGM 0, implicit [[V_MAD_I64_I32_gfx11_e64_]], implicit [[V_MAD_I64_I32_gfx11_e64_1]]
+    ;
+    ; GFX1150-LABEL: name: mad_i64_i32_vvv
+    ; GFX1150: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; GFX1150-NEXT: {{  $}}
+    ; GFX1150-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3
+    ; GFX1150-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX1150-NEXT: S_ENDPGM 0, implicit [[V_MAD_I64_I32_e64_]], implicit [[V_MAD_I64_I32_e64_1]]
     ;
     ; GFX12-LABEL: name: mad_i64_i32_vvv
     ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll
index 450ce590691426..14bcc4f994f890 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll
@@ -2,7 +2,8 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX1100 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX1150 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
 
 ; On GFX11, ensure vdst and src2 do not partially overlap. Full overlap is ok.
@@ -29,13 +30,19 @@ define i64 @mad_i64_i32_sextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
 ; GFX9-NEXT:    v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mad_i64_i32_sextops:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX1100-LABEL: mad_i64_i32_sextops:
+; GFX1100:       ; %bb.0:
+; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
+; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT:    v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
+; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1150-LABEL: mad_i64_i32_sextops:
+; GFX1150:       ; %bb.0:
+; GFX1150-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1150-NEXT:    v_mad_i64_i32 v[0:1], null, v0, v1, v[2:3]
+; GFX1150-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: mad_i64_i32_sextops:
 ; GFX12:       ; %bb.0:
@@ -75,13 +82,19 @@ define i64 @mad_i64_i32_sextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
 ; GFX9-NEXT:    v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mad_i64_i32_sextops_commute:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX1100-LABEL: mad_i64_i32_sextops_commute:
+; GFX1100:       ; %bb.0:
+; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
+; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT:    v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
+; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1150-LABEL: mad_i64_i32_sextops_commute:
+; GFX1150:       ; %bb.0:
+; GFX1150-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1150-NEXT:    v_mad_i64_i32 v[0:1], null, v0, v1, v[2:3]
+; GFX1150-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: mad_i64_i32_sextops_commute:
 ; GFX12:       ; %bb.0:
@@ -121,13 +134,19 @@ define i64 @mad_u64_u32_zextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
 ; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mad_u64_u32_zextops:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX1100-LABEL: mad_u64_u32_zextops:
+; GFX1100:       ; %bb.0:
+; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
+; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT:    v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
+; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1150-LABEL: mad_u64_u32_zextops:
+; GFX1150:       ; %bb.0:
+; GFX1150-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1150-NEXT:    v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3]
+; GFX1150-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: mad_u64_u32_zextops:
 ; GFX12:       ; %bb.0:
@@ -167,13 +186,19 @@ define i64 @mad_u64_u32_zextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
 ; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mad_u64_u32_zextops_commute:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX1100-LABEL: mad_u64_u32_zextops_commute:
+; GFX1100:       ; %bb.0:
+; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
+; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT:    v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
+; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1150-LABEL: mad_u64_u32_zextops_commute:
+; GFX1150:       ; %bb.0:
+; GFX1150-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1150-NEXT:    v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3]
+; GFX1150-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: mad_u64_u32_zextops_commute:
 ; GFX12:       ; %bb.0:
@@ -277,35 +302,65 @@ define i128 @mad_i64_i32_sextops_i32_i128(i32 %arg0, i32 %arg1, i128 %arg2) #0 {
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v8, v5, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mad_i64_i32_sextops_i32_i128:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_mad_u64_u32 v[6:7], null, v0, v1, 0
-; GFX11-NEXT:    v_mov_b32_e32 v8, 0
-; GFX11-NEXT:    v_ashrrev_i32_e32 v14, 31, v0
-; GFX11-NEXT:    v_ashrrev_i32_e32 v15, 31, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_mad_u64_u32 v[9:10], null, v14, v1, v[7:8]
-; GFX11-NEXT:    v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v10, v8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_mad_u64_u32 v[7:8], null, v0, v15, v[9:10]
-; GFX11-NEXT:    v_mov_b32_e32 v10, v8
-; GFX11-NEXT:    v_mad_i64_i32 v[8:9], null, v1, v14, 0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_add_co_u32 v10, s0, v11, v10
-; GFX11-NEXT:    v_add_co_ci_u32_e64 v11, null, 0, 0, s0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_mad_i64_i32 v[12:13], null, v15, v0, v[8:9]
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v14, v15, v[10:11]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_add_co_u32 v8, vcc_lo, v0, v12
-; GFX11-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, v1, v13, vcc_lo
-; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v6, v2
-; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v7, v3, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v8, v4, vcc_lo
-; GFX11-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v9, v5, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX1100-LABEL: mad_i64_i32_sextops_i32_i128:
+; GFX1100:       ; %bb.0:
+; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT:    v_mad_u64_u32 v[6:7], null, v0, v1, 0
+; GFX1100-NEXT:    v_mov_b32_e32 v8, 0
+; GFX1100-NEXT:    v_ashrrev_i32_e32 v14, 31, v0
+; GFX1100-NEXT:    v_ashrrev_i32_e32 v15, 31, v1
+; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT:    v_mad_u64_u32 v[9:10], null, v14, v1, v[7:8]
+; GFX1100-NEXT:    v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v10, v8
+; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT:    v_mad_u64_u32 v[7:8], null, v0, v15, v[9:10]
+; GFX1100-NEXT:    v_mov_b32_e32 v10, v8
+; GFX1100-NEXT:    v_mad_i64_i32 v[8:9], null, v1, v14, 0
+; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT:    v_add_co_u32 v10, s0, v11, v10
+; GFX1100-NEXT:    v_add_co_ci_u32_e64 v11, null, 0, 0, s0
+; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-NEXT:    v_mad_i64_i32 v[12:13], null, v15, v0, v[8:9]
+; GFX1100-NEXT:    v_mad_u64_u32 v[0:1], null, v14, v15, v[10:11]
+; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1100-NEXT:    v_add_co_u32 v8, vcc_lo, v0, v12
+; GFX1100-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, v1, v13, vcc_lo
+; GFX1100-NEXT:    v_add_co_u32 v0, vcc_lo, v6, v2
+; GFX1100-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v7, v3, vcc_lo
+; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1100-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v8, v4, vcc_lo
+; GFX1100-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v9, v5, vcc_lo
+; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1150-LABEL: mad_i64_i32_sextops_i32_i128:
+; GFX1150:       ; %bb.0:
+; GFX1150-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1150-NEXT:    v_mad_u64_u32 v[6:7], null, v0, v1, 0
+; GFX1150-NEXT:    v_mov_b32_e32 v8, 0
+; GFX1150-NEXT:    v_ashrrev_i32_e32 v12, 31, v0
+; GFX1150-NEXT:    v_ashrrev_i32_e32 v13, 31, v1
+; GFX1150-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1150-NEXT:    v_mad_u64_u32 v[9:10], null, v12, v1, v[7:8]
+; GFX1150-NEXT:    v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v10, v8
+; GFX1150-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1150-NEXT:    v_mad_u64_u32 v[7:8], null, v0, v13, v[9:10]
+; GFX1150-NEXT:    v_mov_b32_e32 v10, v8
+; GFX1150-NEXT:    v_mad_i64_i32 v[8:9], null, v1, v12, 0
+; GFX1150-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1150-NEXT:    v_add_co_u32 v10, s0, v11, v10
+; GFX1150-NEXT:    v_add_co_ci_u32_e64 v11, null, 0, 0, s0
+; GFX1150-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1150-NEXT:    v_mad_i64_i32 v[0:1], null, v13, v0, v[8:9]
+; GFX1150-NEXT:    v_mad_u64_u32 v[8:9], null, v12, v13, v[10:11]
+; GFX1150-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1150-NEXT:    v_add_co_u32 v8, vcc_lo, v8, v0
+; GFX1150-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, v9, v1, vcc_lo
+; GFX1150-NEXT:    v_add_co_u32 v0, vcc_lo, v6, v2
+; GFX1150-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v7, v3, vcc_lo
+; GFX1150-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1150-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v8, v4, vcc_lo
+; GFX1150-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v9, v5, vcc_lo
+; GFX1150-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: mad_i64_i32_sextops_i32_i128:
 ; GFX12:       ; %bb.0:
@@ -369,13 +424,19 @@ define i63 @mad_i64_i32_sextops_i32_i63(i32 %arg0, i32 %arg1, i63 %arg2) #0 {
 ; GFX9-NEXT:    v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mad_i64_i32_sextops_i32_i63:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX1100-LABEL: mad_i64_i32_sextops_i32_i63:
+; GFX1100:       ; %bb.0:
+; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
+; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1100-NEXT:    v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
+; GFX1100-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1150-LABEL: mad_i64_i32_sextops_i32_i63:
+; GFX1150:       ; %bb.0:
+; GFX1150-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1150-NEXT:    v_mad_i64_i32 v[0:1], null, v0, v1, v[2:3]
+; GFX1150-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: mad_i64_i32_sextops_i32_i63:
 ; GFX12:       ; %bb.0:
@@ -423,14 +484,23 @@ define i63 @mad_i64_i32_sextops_i31_i63(i31 %arg0, i31 %arg1, i63 %arg2) #0 {
 ; GFX9-NEXT:    v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mad_i64_i32_sextops_i31_i63:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_bfe_i32 v4, v1, 0, 31
-; GFX11-NEXT:    v_bfe_i32 v5, v0, 0, 31
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX1100-LABEL: mad_i64_i32_sextops_i31_i63...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/79460


More information about the llvm-commits mailing list