[llvm] 77aec3b - [AMDGPU][MC][GFX8+] Enabled clamp for v_add_u16, v_sub_u16 and v_subrev_u16
Dmitry Preobrazhensky via llvm-commits
llvm-commits at lists.llvm.org
Mon May 25 10:34:26 PDT 2020
Author: Dmitry Preobrazhensky
Date: 2020-05-25T19:55:38+03:00
New Revision: 77aec3b4c0e63b07d98dbb1aeb693d200e769a05
URL: https://github.com/llvm/llvm-project/commit/77aec3b4c0e63b07d98dbb1aeb693d200e769a05
DIFF: https://github.com/llvm/llvm-project/commit/77aec3b4c0e63b07d98dbb1aeb693d200e769a05.diff
LOG: [AMDGPU][MC][GFX8+] Enabled clamp for v_add_u16, v_sub_u16 and v_subrev_u16
See https://bugs.llvm.org/show_bug.cgi?id=45926
Reviewers: arsenm, rampitec, vpykhtin
Differential Revision: https://reviews.llvm.org/D80430
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/VOP2Instructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir
llvm/test/MC/AMDGPU/gfx10_asm_all.s
llvm/test/MC/AMDGPU/vop2.s
llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
llvm/test/MC/Disassembler/AMDGPU/vop2_vi.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 397791677eda..b988de596c64 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2266,6 +2266,7 @@ def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
+def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], 0, /*EnableClamp=*/1>;
def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 594e11ec4f54..c1ce1b755322 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -626,9 +626,9 @@ defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub
defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>;
def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">;
} // End FPDPRounding = 1
-defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16, add>;
-defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16, sub>;
-defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">;
+defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>;
+defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>;
+defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">;
defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>;
defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>;
defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir
index cc48e9126c9b..ad8f5df33ab0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir
@@ -19,14 +19,14 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX6: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GFX6: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]]
; GFX10-LABEL: name: add_s16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: $vcc_hi = IMPLICIT_DEF
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX10: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX10: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GFX10: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
@@ -51,14 +51,14 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX6: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX6: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GFX6: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]]
; GFX10-LABEL: name: add_s16_zext_to_s32
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: $vcc_hi = IMPLICIT_DEF
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX10: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX10: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GFX10: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_ADD_U16_e64_]], 0, 16, implicit $exec
; GFX10: S_ENDPGM 0, implicit [[V_BFE_U32_]]
%0:vgpr(s32) = COPY $vgpr0
@@ -84,13 +84,13 @@ body: |
; GFX6-LABEL: name: add_s16_neg_inline_const_64
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, implicit $exec
+ ; GFX6: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec
; GFX6: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]]
; GFX10-LABEL: name: add_s16_neg_inline_const_64
; GFX10: liveins: $vgpr0
; GFX10: $vcc_hi = IMPLICIT_DEF
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX10: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, implicit $exec
+ ; GFX10: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec
; GFX10: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
@@ -113,13 +113,13 @@ body: |
; GFX6-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, implicit $exec
+ ; GFX6: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec
; GFX6: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]]
; GFX10-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32
; GFX10: liveins: $vgpr0
; GFX10: $vcc_hi = IMPLICIT_DEF
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX10: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, implicit $exec
+ ; GFX10: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec
; GFX10: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_SUB_U16_e64_]], 0, 16, implicit $exec
; GFX10: S_ENDPGM 0, implicit [[V_BFE_U32_]]
%0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_all.s b/llvm/test/MC/AMDGPU/gfx10_asm_all.s
index 13072ad0c049..aa09c30903ba 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_all.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_all.s
@@ -58995,6 +58995,9 @@ v_add_nc_u16 v5, v1, 0.5
v_add_nc_u16 v5, v1, -4.0
// GFX10: encoding: [0x05,0x00,0x03,0xd7,0x01,0xef,0x01,0x00]
+v_add_nc_u16 v5, v1, -4.0 clamp
+// GFX10: encoding: [0x05,0x80,0x03,0xd7,0x01,0xef,0x01,0x00]
+
v_sub_nc_u16 v5, v1, v2
// GFX10: encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00]
@@ -59073,6 +59076,9 @@ v_sub_nc_u16 v5, v1, 0.5
v_sub_nc_u16 v5, v1, -4.0
// GFX10: encoding: [0x05,0x00,0x04,0xd7,0x01,0xef,0x01,0x00]
+v_sub_nc_u16 v5, v1, -4.0 clamp
+// GFX10: encoding: [0x05,0x80,0x04,0xd7,0x01,0xef,0x01,0x00]
+
v_mul_lo_u16 v5, v1, v2
// GFX10: encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s
index 552bc468b63d..1505c8cfa44d 100644
--- a/llvm/test/MC/AMDGPU/vop2.s
+++ b/llvm/test/MC/AMDGPU/vop2.s
@@ -435,16 +435,31 @@ v_madak_f16 v1, v2, v3, 64.0
// VI: v_add_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
v_add_u16_e32 v1, v2, v3
+// NOSICI: error: invalid operand for instruction
+// NOSICI: v_add_u16 v1, v2, v3 clamp
+// VI: v_add_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x26,0xd1,0x02,0x07,0x02,0x00]
+v_add_u16 v1, v2, v3 clamp
+
// NOSICI: error: instruction not supported on this GPU
// NOSICI: v_sub_u16_e32 v1, v2, v3
// VI: v_sub_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
v_sub_u16_e32 v1, v2, v3
+// NOSICI: error: invalid operand for instruction
+// NOSICI: v_sub_u16 v1, v2, v3 clamp
+// VI: v_sub_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x27,0xd1,0x02,0x07,0x02,0x00]
+v_sub_u16 v1, v2, v3 clamp
+
// NOSICI: error: instruction not supported on this GPU
// NOSICI: v_subrev_u16_e32 v1, v2, v3
// VI: v_subrev_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
v_subrev_u16_e32 v1, v2, v3
+// NOSICI: error: invalid operand for instruction
+// NOSICI: v_subrev_u16 v1, v2, v3 clamp
+// VI: v_subrev_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x28,0xd1,0x02,0x07,0x02,0x00]
+v_subrev_u16 v1, v2, v3 clamp
+
// NOSICI: error: instruction not supported on this GPU
// NOSICI: v_mul_lo_u16_e32 v1, v2, v3
// VI: v_mul_lo_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
index 6b920345d7b8..89cbaa7f0299 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
@@ -21401,6 +21401,9 @@
# GFX10: v_add_nc_u16_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0x04,0x02,0x00]
0x05,0x00,0x03,0xd7,0x6a,0x04,0x02,0x00
+# GFX10: v_add_nc_u16_e64 v5, vcc_lo, v2 clamp ; encoding: [0x05,0x80,0x03,0xd7,0x6a,0x04,0x02,0x00]
+0x05,0x80,0x03,0xd7,0x6a,0x04,0x02,0x00
+
# GFX10: v_add_nc_u32_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x4b]
0x01,0x05,0xfe,0x4b
@@ -95808,6 +95811,9 @@
# GFX10: v_sub_nc_u16_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0x04,0x02,0x00]
0x05,0x00,0x04,0xd7,0x6a,0x04,0x02,0x00
+# GFX10: v_sub_nc_u16_e64 v5, vcc_lo, v2 clamp ; encoding: [0x05,0x80,0x04,0xd7,0x6a,0x04,0x02,0x00]
+0x05,0x80,0x04,0xd7,0x6a,0x04,0x02,0x00
+
# GFX10: v_sub_nc_u32_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x4d]
0x01,0x05,0xfe,0x4d
diff --git a/llvm/test/MC/Disassembler/AMDGPU/vop2_vi.txt b/llvm/test/MC/Disassembler/AMDGPU/vop2_vi.txt
index 4f44326a69bf..eafbece4d96c 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/vop2_vi.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/vop2_vi.txt
@@ -222,12 +222,21 @@
# VI: v_add_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
0x02 0x07 0x02 0x4c
+# VI: v_add_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x26,0xd1,0x02,0x07,0x02,0x00]
+0x01 0x80 0x26 0xd1 0x02 0x07 0x02 0x00
+
# VI: v_sub_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
0x02 0x07 0x02 0x4e
+# VI: v_sub_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x27,0xd1,0x02,0x07,0x02,0x00]
+0x01 0x80 0x27 0xd1 0x02 0x07 0x02 0x00
+
# VI: v_subrev_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
0x02 0x07 0x02 0x50
+# VI: v_subrev_u16_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x28,0xd1,0x02,0x07,0x02,0x00]
+0x01 0x80 0x28 0xd1 0x02 0x07 0x02 0x00
+
# VI: v_mul_lo_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
0x02 0x07 0x02 0x52
More information about the llvm-commits
mailing list