[PATCH] D27423: AMDGPU: Select i16 instructions to VOP3 forms
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 5 11:37:57 PST 2016
arsenm created this revision.
arsenm added a subscriber: llvm-commits.
Herald added a reviewer: tstellarAMD.
Herald added subscribers: tony-tye, yaxunl, nhaehnle, wdng, kzhuravl.
These were selecting directly to the VOP2 form instead
of VOP3 like the i32 instructions. Fixes regressions in
future commits where an immediate isn't folded because it was
initially used for the second operand.
Because uniform 16-bit operations are promoted to i32, it's
difficult to get a simple testcase where this matters.
https://reviews.llvm.org/D27423
Files:
lib/Target/AMDGPU/VOP2Instructions.td
test/CodeGen/AMDGPU/add.i16.ll
Index: test/CodeGen/AMDGPU/add.i16.ll
===================================================================
--- test/CodeGen/AMDGPU/add.i16.ll
+++ test/CodeGen/AMDGPU/add.i16.ll
@@ -4,7 +4,7 @@
; GCN-LABEL: {{^}}v_test_add_i16:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
-; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
+; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
; VI-NEXT: buffer_store_short [[ADD]]
define void @v_test_add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -67,7 +67,7 @@
; GCN-LABEL: {{^}}v_test_add_i16_zext_to_i32:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
-; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
+; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
; VI-NEXT: buffer_store_dword [[ADD]]
define void @v_test_add_i16_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -86,7 +86,7 @@
; GCN-LABEL: {{^}}v_test_add_i16_zext_to_i64:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
-; VI-DAG: v_add_u16_e32 v[[ADD:[0-9]+]], [[A]], [[B]]
+; VI-DAG: v_add_u16_e32 v[[ADD:[0-9]+]], [[B]], [[A]]
; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0
; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
define void @v_test_add_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
@@ -106,7 +106,7 @@
; GCN-LABEL: {{^}}v_test_add_i16_sext_to_i32:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
-; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
+; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
; VI-NEXT: v_bfe_i32 [[SEXT:v[0-9]+]], [[ADD]], 0, 16
; VI-NEXT: buffer_store_dword [[SEXT]]
define void @v_test_add_i16_sext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
@@ -126,7 +126,7 @@
; GCN-LABEL: {{^}}v_test_add_i16_sext_to_i64:
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
-; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
+; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
Index: lib/Target/AMDGPU/VOP2Instructions.td
===================================================================
--- lib/Target/AMDGPU/VOP2Instructions.td
+++ lib/Target/AMDGPU/VOP2Instructions.td
@@ -418,27 +418,27 @@
let Predicates = [isVI] in {
-defm : Arithmetic_i16_Pats<add, V_ADD_U16_e32>;
-defm : Arithmetic_i16_Pats<mul, V_MUL_LO_U16_e32>;
-defm : Arithmetic_i16_Pats<sub, V_SUB_U16_e32>;
-defm : Arithmetic_i16_Pats<smin, V_MIN_I16_e32>;
-defm : Arithmetic_i16_Pats<smax, V_MAX_I16_e32>;
-defm : Arithmetic_i16_Pats<umin, V_MIN_U16_e32>;
-defm : Arithmetic_i16_Pats<umax, V_MAX_U16_e32>;
+defm : Arithmetic_i16_Pats<add, V_ADD_U16_e64>;
+defm : Arithmetic_i16_Pats<mul, V_MUL_LO_U16_e64>;
+defm : Arithmetic_i16_Pats<sub, V_SUB_U16_e64>;
+defm : Arithmetic_i16_Pats<smin, V_MIN_I16_e64>;
+defm : Arithmetic_i16_Pats<smax, V_MAX_I16_e64>;
+defm : Arithmetic_i16_Pats<umin, V_MIN_U16_e64>;
+defm : Arithmetic_i16_Pats<umax, V_MAX_U16_e64>;
def : Pat <
(and i16:$src0, i16:$src1),
- (V_AND_B32_e32 $src0, $src1)
+ (V_AND_B32_e64 $src0, $src1)
>;
def : Pat <
(or i16:$src0, i16:$src1),
- (V_OR_B32_e32 $src0, $src1)
+ (V_OR_B32_e64 $src0, $src1)
>;
def : Pat <
(xor i16:$src0, i16:$src1),
- (V_XOR_B32_e32 $src0, $src1)
+ (V_XOR_B32_e64 $src0, $src1)
>;
defm : Bits_OpsRev_i16_Pats<shl, V_LSHLREV_B16_e32>;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D27423.80301.patch
Type: text/x-patch
Size: 3780 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161205/644e1f75/attachment.bin>
More information about the llvm-commits
mailing list