[llvm] r312496 - [x86] add test for unnecessary cmp + masked store; NFC
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 4 10:21:17 PDT 2017
Author: spatel
Date: Mon Sep 4 10:21:17 2017
New Revision: 312496
URL: http://llvm.org/viewvc/llvm-project?rev=312496&view=rev
Log:
[x86] add test for unnecessary cmp + masked store; NFC
As noted in PR11210:
https://bugs.llvm.org/show_bug.cgi?id=11210
...fixing this should allow us to eliminate x86-specific masked store intrinsics in IR.
(Although more testing will be needed to confirm that.)
Modified:
llvm/trunk/test/CodeGen/X86/masked_memop.ll
Modified: llvm/trunk/test/CodeGen/X86/masked_memop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_memop.ll?rev=312496&r1=312495&r2=312496&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_memop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_memop.ll Mon Sep 4 10:21:17 2017
@@ -1140,6 +1140,34 @@ define <8 x double> @load_one_mask_bit_s
ret <8 x double> %res
}
+; FIXME: The mask bit for each data element is the most significant bit of the mask operand, so a compare isn't needed.
+
+define void @trunc_mask(<4 x float> %x, <4 x float>* %ptr, <4 x float> %y, <4 x i32> %mask) {
+; AVX-LABEL: trunc_mask:
+; AVX: ## BB#0:
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vmaskmovps %xmm0, %xmm1, (%rdi)
+; AVX-NEXT: retq
+;
+; AVX512F-LABEL: trunc_mask:
+; AVX512F: ## BB#0:
+; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
+; AVX512F-NEXT: vmaskmovps %xmm0, %xmm1, (%rdi)
+; AVX512F-NEXT: retq
+;
+; SKX-LABEL: trunc_mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vpcmpgtd %xmm2, %xmm1, %k1
+; SKX-NEXT: vmovups %xmm0, (%rdi) {%k1}
+; SKX-NEXT: retq
+ %bool_mask = icmp slt <4 x i32> %mask, zeroinitializer
+ call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %x, <4 x float>* %ptr, i32 1, <4 x i1> %bool_mask)
+ ret void
+}
+
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
declare <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>*, i32, <4 x i1>, <4 x i64>)
More information about the llvm-commits
mailing list