[llvm] [AMDGPU] Add gfx1250 V_ADD_{MIN|MAX}_{U|I}32 instructions (PR #151379)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 30 11:56:49 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

<details>
<summary>Changes</summary>



---

Patch is 40.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151379.diff


13 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+4) 
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+3) 
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+18) 
- (modified) llvm/test/CodeGen/AMDGPU/add-max.ll (+29-33) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s (+60) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s (+60) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s (+48) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s (+48) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s (+32) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s (+32) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt (+60) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt (+36) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt (+24) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 071c9406a1517..8a0c4ac6ed8d7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2576,6 +2576,10 @@ def HasFmaakFmamkF64Insts :
   Predicate<"Subtarget->hasFmaakFmamkF64Insts()">,
   AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
 
+def HasAddMinMaxInsts :
+  Predicate<"Subtarget->hasAddMinMaxInsts()">,
+  AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
+
 def HasPkAddMinMaxInsts :
   Predicate<"Subtarget->hasPkAddMinMaxInsts()">,
   AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index fba1c5a0f05e1..bdd900d748531 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1535,6 +1535,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   // \returns true if the target has V_{MIN|MAX}_{I|U}64 instructions.
   bool hasIntMinMax64() const { return GFX1250Insts; }
 
+  // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions.
+  bool hasAddMinMaxInsts() const { return GFX1250Insts; }
+
   // \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
   bool hasPkAddMinMaxInsts() const { return GFX1250Insts; }
 
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 5586dd872fef5..ad7057b657036 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -746,6 +746,13 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
   defm V_MAXIMUM3_F16 : VOP3Inst_t16 <"v_maximum3_f16", VOP_F16_F16_F16_F16, AMDGPUfmaximum3>;
 } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
 
+let SubtargetPredicate = HasAddMinMaxInsts, isCommutable = 1, isReMaterializable = 1 in {
+  defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP_I32_I32_I32_I32>;
+  defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP_I32_I32_I32_I32>;
+  defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP_I32_I32_I32_I32>;
+  defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP_I32_I32_I32_I32>;
+}
+
 defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
 defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
 
@@ -885,6 +892,13 @@ def : GCNPat<
   (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
 >;
 
+let SubtargetPredicate = HasAddMinMaxInsts in {
+def : ThreeOp_i32_Pats<add, smax, V_ADD_MAX_I32_e64>;
+def : ThreeOp_i32_Pats<add, umax, V_ADD_MAX_U32_e64>;
+def : ThreeOp_i32_Pats<add, smin, V_ADD_MIN_I32_e64>;
+def : ThreeOp_i32_Pats<add, umin, V_ADD_MIN_U32_e64>;
+}
+
 def : VOPBinOpClampPat<saddsat, V_ADD_I32_e64, i32>;
 def : VOPBinOpClampPat<ssubsat, V_SUB_I32_e64, i32>;
 
@@ -1821,6 +1835,10 @@ defm V_MIN_U64 : VOP3Only_Realtriple_gfx1250<0x318>;
 defm V_MAX_U64 : VOP3Only_Realtriple_gfx1250<0x319>;
 defm V_MIN_I64 : VOP3Only_Realtriple_gfx1250<0x31a>;
 defm V_MAX_I64 : VOP3Only_Realtriple_gfx1250<0x31b>;
+defm V_ADD_MAX_I32 : VOP3Only_Realtriple_gfx1250<0x25e>;
+defm V_ADD_MAX_U32 : VOP3Only_Realtriple_gfx1250<0x25f>;
+defm V_ADD_MIN_I32 : VOP3Only_Realtriple_gfx1250<0x260>;
+defm V_ADD_MIN_U32 : VOP3Only_Realtriple_gfx1250<0x261>;
 
 defm V_CVT_PK_FP8_F32  : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x369, "v_cvt_pk_fp8_f32">;
 defm V_CVT_PK_BF8_F32  : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x36a, "v_cvt_pk_bf8_f32">;
diff --git a/llvm/test/CodeGen/AMDGPU/add-max.ll b/llvm/test/CodeGen/AMDGPU/add-max.ll
index b9925060aaec4..00c66563572ea 100644
--- a/llvm/test/CodeGen/AMDGPU/add-max.ll
+++ b/llvm/test/CodeGen/AMDGPU/add-max.ll
@@ -5,9 +5,7 @@
 define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
 ; GCN-LABEL: add_max_u32_vvv:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_add_nc_u32_e32 v0, v0, v1
-; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT:    v_max_u32_e32 v0, v0, v2
+; GCN-NEXT:    v_add_max_u32_e64 v0, v0, v1, v2
 ; GCN-NEXT:    ; return to shader part epilog
   %add = add i32 %a, %b
   %max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -18,9 +16,7 @@ define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
 define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
 ; GCN-LABEL: add_max_u32_svv:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_add_nc_u32_e32 v0, s0, v0
-; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT:    v_max_u32_e32 v0, v0, v1
+; GCN-NEXT:    v_add_max_u32_e64 v0, s0, v0, v1
 ; GCN-NEXT:    ; return to shader part epilog
   %add = add i32 %a, %b
   %max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -29,12 +25,17 @@ define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
 }
 
 define amdgpu_ps float @add_max_u32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
-; GCN-LABEL: add_max_u32_ssv:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_add_co_i32 s0, s0, s1
-; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GCN-NEXT:    v_max_u32_e32 v0, s0, v0
-; GCN-NEXT:    ; return to shader part epilog
+; SDAG-LABEL: add_max_u32_ssv:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    v_add_max_u32_e64 v0, s0, s1, v0
+; SDAG-NEXT:    ; return to shader part epilog
+;
+; GISEL-LABEL: add_max_u32_ssv:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_add_co_i32 s0, s0, s1
+; GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GISEL-NEXT:    v_max_u32_e32 v0, s0, v0
+; GISEL-NEXT:    ; return to shader part epilog
   %add = add i32 %a, %b
   %max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
   %ret = bitcast i32 %max to float
@@ -58,9 +59,7 @@ define amdgpu_ps float @add_max_u32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c
 define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
 ; GCN-LABEL: add_max_u32_vsi:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_add_nc_u32_e32 v0, s0, v0
-; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT:    v_max_u32_e32 v0, 4, v0
+; GCN-NEXT:    v_add_max_u32_e64 v0, v0, s0, 4
 ; GCN-NEXT:    ; return to shader part epilog
   %add = add i32 %a, %b
   %max = call i32 @llvm.umax.i32(i32 %add, i32 4)
@@ -71,9 +70,7 @@ define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
 define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
 ; GCN-LABEL: add_max_u32_svl:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_add_nc_u32_e32 v0, s0, v0
-; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT:    v_max_u32_e32 v0, 0x64, v0
+; GCN-NEXT:    v_add_max_u32_e64 v0, s0, v0, 0x64
 ; GCN-NEXT:    ; return to shader part epilog
   %add = add i32 %a, %b
   %max = call i32 @llvm.umax.i32(i32 %add, i32 100)
@@ -82,12 +79,17 @@ define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
 }
 
 define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
-; GCN-LABEL: add_max_u32_slv:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_addk_co_i32 s0, 0x64
-; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GCN-NEXT:    v_max_u32_e32 v0, s0, v0
-; GCN-NEXT:    ; return to shader part epilog
+; SDAG-LABEL: add_max_u32_slv:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    v_add_max_u32_e64 v0, 0x64, s0, v0
+; SDAG-NEXT:    ; return to shader part epilog
+;
+; GISEL-LABEL: add_max_u32_slv:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_addk_co_i32 s0, 0x64
+; GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GISEL-NEXT:    v_max_u32_e32 v0, s0, v0
+; GISEL-NEXT:    ; return to shader part epilog
   %add = add i32 %a, 100
   %max = call i32 @llvm.umax.i32(i32 %add, i32 %b)
   %ret = bitcast i32 %max to float
@@ -97,9 +99,7 @@ define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
 define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
 ; GCN-LABEL: add_max_i32_vvv:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_add_nc_u32_e32 v0, v0, v1
-; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT:    v_max_i32_e32 v0, v0, v2
+; GCN-NEXT:    v_add_max_i32_e64 v0, v0, v1, v2
 ; GCN-NEXT:    ; return to shader part epilog
   %add = add i32 %a, %b
   %max = call i32 @llvm.smax.i32(i32 %add, i32 %c)
@@ -110,9 +110,7 @@ define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
 define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
 ; GCN-LABEL: add_min_u32_vvv:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_add_nc_u32_e32 v0, v0, v1
-; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT:    v_min_u32_e32 v0, v0, v2
+; GCN-NEXT:    v_add_min_u32_e64 v0, v0, v1, v2
 ; GCN-NEXT:    ; return to shader part epilog
   %add = add i32 %a, %b
   %max = call i32 @llvm.umin.i32(i32 %add, i32 %c)
@@ -123,9 +121,7 @@ define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
 define amdgpu_ps float @add_min_i32_vvv(i32 %a, i32 %b, i32 %c) {
 ; GCN-LABEL: add_min_i32_vvv:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_add_nc_u32_e32 v0, v0, v1
-; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GCN-NEXT:    v_min_i32_e32 v0, v0, v2
+; GCN-NEXT:    v_add_min_i32_e64 v0, v0, v1, v2
 ; GCN-NEXT:    ; return to shader part epilog
   %add = add i32 %a, %b
   %max = call i32 @llvm.smin.i32(i32 %add, i32 %c)
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
index 04c55eedc18d4..a2508057b16af 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
@@ -217,6 +217,66 @@ v_mad_nc_i64_i32 v[2:3], v4, v7, 12345
 v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
 // GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
 
+v_add_min_i32 v2, s4, v7, v8
+// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_min_i32 v2, v4, 0, 1
+// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_min_i32 v2, v4, 3, s2
+// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_min_i32 v2, s4, 4, v2
+// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_min_i32 v2, v4, v7, 12345
+// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_i32 v2, s4, v7, v8
+// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_max_i32 v2, v4, 0, 1
+// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_max_i32 v2, v4, 3, s2
+// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_max_i32 v2, s4, 4, v2
+// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_max_i32 v2, v4, v7, 12345
+// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_u32 v2, s4, v7, v8
+// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_min_u32 v2, v4, 0, 1
+// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_min_u32 v2, v4, 3, s2
+// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_min_u32 v2, s4, 4, v2
+// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_min_u32 v2, v4, v7, 12345
+// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_u32 v2, s4, v7, v8
+// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_max_u32 v2, v4, 0, 1
+// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_max_u32 v2, v4, 3, s2
+// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_max_u32 v2, s4, 4, v2
+// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_max_u32 v2, v4, v7, 12345
+// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
 v_cvt_pk_bf16_f32 v5, v1, v2
 // GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2            ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00]
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
index ebfeb3f74d752..52a18cd82010c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
@@ -217,6 +217,66 @@ v_mad_nc_i64_i32 v[2:3], v4, v7, 12345
 v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
 // GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
 
+v_add_min_i32 v2, s4, v7, v8
+// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_min_i32 v2, v4, 0, 1
+// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_min_i32 v2, v4, 3, s2
+// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_min_i32 v2, s4, 4, v2
+// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_min_i32 v2, v4, v7, 12345
+// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_i32 v2, s4, v7, v8
+// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_max_i32 v2, v4, 0, 1
+// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_max_i32 v2, v4, 3, s2
+// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_max_i32 v2, s4, 4, v2
+// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_max_i32 v2, v4, v7, 12345
+// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_u32 v2, s4, v7, v8
+// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_min_u32 v2, v4, 0, 1
+// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_min_u32 v2, v4, 3, s2
+// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_min_u32 v2, s4, 4, v2
+// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_min_u32 v2, v4, v7, 12345
+// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_u32 v2, s4, v7, v8
+// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8        ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
+
+v_add_max_u32 v2, v4, 0, 1
+// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1          ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
+
+v_add_max_u32 v2, v4, 3, s2
+// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2         ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
+
+v_add_max_u32 v2, s4, 4, v2
+// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2         ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
+
+v_add_max_u32 v2, v4, v7, 12345
+// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039    ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
 v_cvt_pk_bf16_f32 v5, v1, v2
 // GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2            ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00]
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
index d9c7645543db2..934186fb4fde4 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s
@@ -146,6 +146,54 @@ v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:102 op_sel:[1,1,1,1] quad_perm:[0,1,2
 // GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0x66 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x33,0xd6,0xfa,0x04,0x0e,0xcc,0x01,0xe4,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_add_min_i32 v2, v4, v7, v8 quad_perm:[1,2,3,1]
+// GFX1250: v_add_min_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x60,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_min_i32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_add_min_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x60,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_min_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_add_min_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x60,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_i32 v2, v4, v7, v8 quad_perm:[3,2,1,0]
+// GFX1250: v_add_max_i32_e64_dpp v2, v4, v7, v8 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x5e,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_i32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_add_max_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x5e,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_i32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_add_max_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x5e,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_min_u32 v2, v4, v7, v8 quad_perm:[3,2,1,0]
+// GFX1250: v_add_min_u32_e64_dpp v2, v4, v7, v8 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x61,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_min_u32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_add_min_u32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x61,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_min_u32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_add_min_u32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x61,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_u32 v2, v4, v7, v8 quad_perm:[3,2,1,0]
+// GFX1250: v_add_max_u32_e64_dpp v2, v4, v7, v8 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x5f,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_u32 v2, v4, v7, v8 row_share:3 fi:1
+// GFX1250: v_add_max_u32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x5f,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_add_max_u32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_add_max_u32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x5f,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
 // GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/151379


More information about the llvm-commits mailing list