[PATCH] D26670: AMDGPU/SI: Fix pattern for i16 = sign_extend i1
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 15 07:27:19 PST 2016
tstellarAMD created this revision.
tstellarAMD added a reviewer: arsenm.
tstellarAMD added a subscriber: llvm-commits.
Herald added subscribers: tony-tye, yaxunl, nhaehnle, wdng, kzhuravl.
https://reviews.llvm.org/D26670
Files:
lib/Target/AMDGPU/VOP2Instructions.td
test/CodeGen/AMDGPU/sign_extend.ll
Index: test/CodeGen/AMDGPU/sign_extend.ll
===================================================================
--- test/CodeGen/AMDGPU/sign_extend.ll
+++ test/CodeGen/AMDGPU/sign_extend.ll
@@ -11,6 +11,25 @@
ret void
}
+; XGCN-LABEL: {{^}}s_sext_i1_to_i16:
+; XGCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1, s
+;define void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+; %cmp = icmp eq i32 %a, %b
+; %sext = sext i1 %cmp to i16
+; store i16 %sext, i16 addrspace(1)* %out, align 4
+; ret void
+;}
+
+; XGCN-LABEL: {{^}}v_sext_i1_to_i16:
+; XGCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1, s
+;define void @v_sext_i1_to_i16(i16 addrspace(1)* %out) nounwind {
+; %id = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+; %cmp = icmp eq i32 %id, 5
+; %sext = sext i1 %cmp to i16
+; store i16 %sext, i16 addrspace(1)* %out, align 4
+; ret void
+;}
+
; GCN-LABEL: {{^}}test_s_sext_i32_to_i64:
; GCN: s_ashr_i32
; GCN: s_endpg
@@ -72,6 +91,22 @@
ret void
}
+; This purpose of this test is to make sure the i16 = sign_extend i1 node
+; makes it all the way throught the legalizer/optimizer to make sure
+; we select this correctly. In the s_sext_i1_to_i16, the sign_extend node
+; is optimized to a select very early.
+; GCN-LABEL: {{^}}s_sext_i1_to_i16_with_and:
+; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1
+; GCN-NEXT: buffer_store_short [[RESULT]]
+define void @s_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+ %cmp0 = icmp eq i32 %a, %b
+ %cmp1 = icmp eq i32 %c, %d
+ %cmp = and i1 %cmp0, %cmp1
+ %sext = sext i1 %cmp to i16
+ store i16 %sext, i16 addrspace(1)* %out
+ ret void
+}
+
; GCN-LABEL: {{^}}s_sext_v4i8_to_v4i32:
; GCN: s_load_dword [[VAL:s[0-9]+]]
; GCN-DAG: s_bfe_i32 [[EXT2:s[0-9]+]], [[VAL]], 0x80010
Index: lib/Target/AMDGPU/VOP2Instructions.td
===================================================================
--- lib/Target/AMDGPU/VOP2Instructions.td
+++ lib/Target/AMDGPU/VOP2Instructions.td
@@ -433,9 +433,13 @@
defm : Bits_OpsRev_i16_Pats<sra, V_ASHRREV_B16_e32>;
def : ZExt_i16_i1_Pat<zext>;
-def : ZExt_i16_i1_Pat<sext>;
def : ZExt_i16_i1_Pat<anyext>;
+def : Pat <
+ (i16 (sext i1:$src)),
+ (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)
+>;
+
} // End Predicates = [isVI]
//===----------------------------------------------------------------------===//
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D26670.77997.patch
Type: text/x-patch
Size: 2384 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161115/ae1ff0b9/attachment.bin>
More information about the llvm-commits
mailing list