[llvm] [AMDGPU] Add clamp support to v_add_{max|min}_{i|u}32 (PR #164489)
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 21 13:23:32 PDT 2025
https://github.com/rampitec created https://github.com/llvm/llvm-project/pull/164489
None
>From ce2752aedcd7486efdf40d1e67bf4695a9b899bd Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Tue, 21 Oct 2025 13:21:38 -0700
Subject: [PATCH] [AMDGPU] Add clamp support to v_add_{max|min}_{i|u}32
---
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 8 +--
llvm/test/CodeGen/AMDGPU/add-max.ll | 18 +++----
llvm/test/CodeGen/AMDGPU/bf16.ll | 26 +++++-----
llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s | 52 ++++++++++++-------
llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s | 52 ++++++++++++-------
.../Disassembler/AMDGPU/gfx1250_dasm_vop3.txt | 52 ++++++++++++-------
6 files changed, 122 insertions(+), 86 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 42ec8ba876a8f..7cce033b30141 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -775,10 +775,10 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
let SubtargetPredicate = HasAddMinMaxInsts, isCommutable = 1, isReMaterializable = 1 in {
- defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP_I32_I32_I32_I32>;
- defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP_I32_I32_I32_I32>;
- defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP_I32_I32_I32_I32>;
- defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP_I32_I32_I32_I32>;
+ defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+ defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+ defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+ defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
}
defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
diff --git a/llvm/test/CodeGen/AMDGPU/add-max.ll b/llvm/test/CodeGen/AMDGPU/add-max.ll
index 00c66563572ea..b3a70577b6979 100644
--- a/llvm/test/CodeGen/AMDGPU/add-max.ll
+++ b/llvm/test/CodeGen/AMDGPU/add-max.ll
@@ -5,7 +5,7 @@
define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_max_u32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_max_u32_e64 v0, v0, v1, v2
+; GCN-NEXT: v_add_max_u32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -16,7 +16,7 @@ define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
; GCN-LABEL: add_max_u32_svv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, v1
+; GCN-NEXT: v_add_max_u32 v0, s0, v0, v1
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -27,7 +27,7 @@ define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_max_u32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
; SDAG-LABEL: add_max_u32_ssv:
; SDAG: ; %bb.0:
-; SDAG-NEXT: v_add_max_u32_e64 v0, s0, s1, v0
+; SDAG-NEXT: v_add_max_u32 v0, s0, s1, v0
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: add_max_u32_ssv:
@@ -59,7 +59,7 @@ define amdgpu_ps float @add_max_u32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c
define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
; GCN-LABEL: add_max_u32_vsi:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_max_u32_e64 v0, v0, s0, 4
+; GCN-NEXT: v_add_max_u32 v0, v0, s0, 4
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 4)
@@ -70,7 +70,7 @@ define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
; GCN-LABEL: add_max_u32_svl:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, 0x64
+; GCN-NEXT: v_add_max_u32 v0, s0, v0, 0x64
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 100)
@@ -81,7 +81,7 @@ define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
; SDAG-LABEL: add_max_u32_slv:
; SDAG: ; %bb.0:
-; SDAG-NEXT: v_add_max_u32_e64 v0, 0x64, s0, v0
+; SDAG-NEXT: v_add_max_u32 v0, 0x64, s0, v0
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: add_max_u32_slv:
@@ -99,7 +99,7 @@ define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_max_i32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_max_i32_e64 v0, v0, v1, v2
+; GCN-NEXT: v_add_max_i32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.smax.i32(i32 %add, i32 %c)
@@ -110,7 +110,7 @@ define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_min_u32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_min_u32_e64 v0, v0, v1, v2
+; GCN-NEXT: v_add_min_u32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umin.i32(i32 %add, i32 %c)
@@ -121,7 +121,7 @@ define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_min_i32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_min_i32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_min_i32_e64 v0, v0, v1, v2
+; GCN-NEXT: v_add_min_i32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.smin.i32(i32 %add, i32 %c)
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 7ee0015f15997..711d57baac15f 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -39137,7 +39137,7 @@ define bfloat @v_sitofp_i64_to_bf16(i64 %x) {
; GFX1250-NEXT: v_ashrrev_i32_e32 v2, 31, v2
; GFX1250-NEXT: v_add_nc_u32_e32 v2, 32, v2
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_add_min_u32_e64 v2, v3, -1, v2
+; GFX1250-NEXT: v_add_min_u32 v2, v3, -1, v2
; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v2, v[0:1]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_min_u32_e32 v0, 1, v0
@@ -39487,8 +39487,8 @@ define <2 x bfloat> @v_sitofp_v2i64_to_v2bf16(<2 x i64> %x) {
; GFX1250-NEXT: v_dual_ashrrev_i32 v5, 31, v5 :: v_dual_ashrrev_i32 v4, 31, v4
; GFX1250-NEXT: v_dual_add_nc_u32 v5, 32, v5 :: v_dual_add_nc_u32 v4, 32, v4
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_add_min_u32_e64 v5, v7, -1, v5
-; GFX1250-NEXT: v_add_min_u32_e64 v4, v6, -1, v4
+; GFX1250-NEXT: v_add_min_u32 v5, v7, -1, v5
+; GFX1250-NEXT: v_add_min_u32 v4, v6, -1, v4
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v5, v[0:1]
; GFX1250-NEXT: v_lshlrev_b64_e32 v[2:3], v4, v[2:3]
@@ -39979,9 +39979,9 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX1250TRUE16-NEXT: v_dual_add_nc_u32 v7, 32, v7 :: v_dual_add_nc_u32 v6, 32, v6
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250TRUE16-NEXT: v_ashrrev_i32_e32 v8, 31, v8
-; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v7, v10, -1, v7
+; GFX1250TRUE16-NEXT: v_add_min_u32 v7, v10, -1, v7
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v6, v9, -1, v6
+; GFX1250TRUE16-NEXT: v_add_min_u32 v6, v9, -1, v6
; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[2:3], v7, v[2:3]
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[4:5], v6, v[4:5]
@@ -39991,7 +39991,7 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX1250TRUE16-NEXT: v_min_u32_e32 v4, 1, v4
; GFX1250TRUE16-NEXT: v_or_b32_e32 v2, v3, v2
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v8, v11, -1, v8
+; GFX1250TRUE16-NEXT: v_add_min_u32 v8, v11, -1, v8
; GFX1250TRUE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v4, v5, v4 bitop3:0x54
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v2, v2
@@ -40027,8 +40027,8 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX1250FAKE16-NEXT: v_dual_ashrrev_i32 v6, 31, v6 :: v_dual_ashrrev_i32 v7, 31, v7
; GFX1250FAKE16-NEXT: v_dual_add_nc_u32 v6, 32, v6 :: v_dual_add_nc_u32 v7, 32, v7
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v6, v10, -1, v6
-; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v7, v11, -1, v7
+; GFX1250FAKE16-NEXT: v_add_min_u32 v6, v10, -1, v6
+; GFX1250FAKE16-NEXT: v_add_min_u32 v7, v11, -1, v7
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
@@ -40038,7 +40038,7 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX1250FAKE16-NEXT: v_min_u32_e32 v0, 1, v0
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250FAKE16-NEXT: v_or_b32_e32 v2, v3, v2
-; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v8, v9, -1, v8
+; GFX1250FAKE16-NEXT: v_add_min_u32 v8, v9, -1, v8
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250FAKE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v2, v2
@@ -40656,18 +40656,18 @@ define <4 x bfloat> @v_sitofp_v4i64_to_v4bf16(<4 x i64> %x) {
; GFX1250-NEXT: v_dual_add_nc_u32 v9, 32, v9 :: v_dual_add_nc_u32 v8, 32, v8
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_dual_ashrrev_i32 v10, 31, v10 :: v_dual_bitop2_b32 v11, v0, v1 bitop3:0x14
-; GFX1250-NEXT: v_add_min_u32_e64 v9, v13, -1, v9
+; GFX1250-NEXT: v_add_min_u32 v9, v13, -1, v9
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_add_min_u32_e64 v8, v12, -1, v8
+; GFX1250-NEXT: v_add_min_u32 v8, v12, -1, v8
; GFX1250-NEXT: v_dual_ashrrev_i32 v11, 31, v11 :: v_dual_add_nc_u32 v10, 32, v10
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250-NEXT: v_lshlrev_b64_e32 v[4:5], v9, v[4:5]
; GFX1250-NEXT: v_lshlrev_b64_e32 v[6:7], v8, v[6:7]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_add_nc_u32_e32 v11, 32, v11
-; GFX1250-NEXT: v_add_min_u32_e64 v10, v14, -1, v10
+; GFX1250-NEXT: v_add_min_u32 v10, v14, -1, v10
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_add_min_u32_e64 v11, v15, -1, v11
+; GFX1250-NEXT: v_add_min_u32 v11, v15, -1, v11
; GFX1250-NEXT: v_lshlrev_b64_e32 v[2:3], v10, v[2:3]
; GFX1250-NEXT: v_min_u32_e32 v6, 1, v6
; GFX1250-NEXT: v_min_u32_e32 v4, 1, v4
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
index d3b44eb788444..8160544ddea4d 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
@@ -218,64 +218,76 @@ v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
// GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_i32 v2, s4, v7, v8
-// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_min_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_i32 v2, v4, 0, 1
-// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_min_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
v_add_min_i32 v2, v4, 3, s2
-// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_min_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
v_add_min_i32 v2, s4, 4, v2
-// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_min_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
v_add_min_i32 v2, v4, v7, 12345
-// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_min_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_i32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04]
v_add_max_i32 v2, s4, v7, v8
-// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_max_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
v_add_max_i32 v2, v4, 0, 1
-// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_max_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
v_add_max_i32 v2, v4, 3, s2
-// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_max_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
v_add_max_i32 v2, s4, 4, v2
-// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_max_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
v_add_max_i32 v2, v4, v7, 12345
-// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_max_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_i32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_max_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5e,0xd6,0x01,0x05,0x0e,0x04]
v_add_min_u32 v2, s4, v7, v8
-// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_min_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_u32 v2, v4, 0, 1
-// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_min_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
v_add_min_u32 v2, v4, 3, s2
-// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_min_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
v_add_min_u32 v2, s4, 4, v2
-// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_min_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
v_add_min_u32 v2, v4, v7, 12345
-// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_min_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04]
v_add_max_u32 v2, s4, v7, v8
-// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_max_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
v_add_max_u32 v2, v4, 0, 1
-// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_max_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
v_add_max_u32 v2, v4, 3, s2
-// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_max_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
v_add_max_u32 v2, s4, 4, v2
-// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_max_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
v_add_max_u32 v2, v4, v7, 12345
-// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_max_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
v_cvt_pk_bf16_f32 v5, v1, v2
// GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
index 98d07ac1ece27..d913bd2db504b 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
@@ -218,64 +218,76 @@ v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
// GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_i32 v2, s4, v7, v8
-// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_min_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_i32 v2, v4, 0, 1
-// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_min_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
v_add_min_i32 v2, v4, 3, s2
-// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_min_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
v_add_min_i32 v2, s4, 4, v2
-// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_min_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
v_add_min_i32 v2, v4, v7, 12345
-// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_min_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_i32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04]
v_add_max_i32 v2, s4, v7, v8
-// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_max_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
v_add_max_i32 v2, v4, 0, 1
-// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_max_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
v_add_max_i32 v2, v4, 3, s2
-// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_max_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
v_add_max_i32 v2, s4, 4, v2
-// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_max_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
v_add_max_i32 v2, v4, v7, 12345
-// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_max_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
v_add_min_u32 v2, s4, v7, v8
-// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_min_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_u32 v2, v4, 0, 1
-// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_min_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
v_add_min_u32 v2, v4, 3, s2
-// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_min_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
v_add_min_u32 v2, s4, 4, v2
-// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_min_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
v_add_min_u32 v2, v4, v7, 12345
-// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_min_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04]
v_add_max_u32 v2, s4, v7, v8
-// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_max_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
v_add_max_u32 v2, v4, 0, 1
-// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_max_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
v_add_max_u32 v2, v4, 3, s2
-// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_max_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
v_add_max_u32 v2, s4, 4, v2
-// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_max_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
v_add_max_u32 v2, v4, v7, 12345
-// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_max_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
v_cvt_pk_bf16_f32 v5, v1, v2
// GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
index 29bfa54f2c10d..7af0bfe5b5327 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
@@ -237,64 +237,76 @@
# GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04
-# GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+# GFX1250: v_add_min_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04
-# GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+# GFX1250: v_add_min_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02
-# GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+# GFX1250: v_add_min_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00
-# GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+# GFX1250: v_add_min_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
-# GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+# GFX1250: v_add_min_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04
+# GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
+
+0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04
+# GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04]
0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04
-# GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+# GFX1250: v_add_max_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04
-# GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+# GFX1250: v_add_max_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02
-# GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+# GFX1250: v_add_max_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00
-# GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+# GFX1250: v_add_max_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
-# GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+# GFX1250: v_add_max_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04
-# GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
+# GFX1250: v_add_min_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04
-# GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
+# GFX1250: v_add_min_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02
-# GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
+# GFX1250: v_add_min_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00
-# GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
+# GFX1250: v_add_min_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
-# GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+# GFX1250: v_add_min_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04
+# GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04]
0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04
-# GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
+# GFX1250: v_add_max_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04
-# GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
+# GFX1250: v_add_max_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02
-# GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
+# GFX1250: v_add_max_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00
-# GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
+# GFX1250: v_add_max_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
-# GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+# GFX1250: v_add_max_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04
+# GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
0xff,0x81,0x6d,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf
# GFX1250: v_cvt_pk_bf16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6d,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf]
More information about the llvm-commits
mailing list