[llvm] r237536 - AVX-512: fixed a bug in mask operations - (i1 1) pattern

Elena Demikhovsky elena.demikhovsky at intel.com
Sun May 17 00:28:52 PDT 2015


Author: delena
Date: Sun May 17 02:28:51 2015
New Revision: 237536

URL: http://llvm.org/viewvc/llvm-project?rev=237536&view=rev
Log:
AVX-512: fixed a bug in mask operations - (i1 1) pattern
Filling k-reg with all-ones value was wrong,
(i1 1) should switch on only one bit in mask register


Added:
    llvm/trunk/test/CodeGen/X86/avx512-mask-bugfix.ll   (with props)
Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=237536&r1=237535&r2=237536&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun May 17 02:28:51 2015
@@ -2156,8 +2156,8 @@ let Predicates = [HasAVX512] in {
   def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
   def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
   def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
-  def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
-  def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
+  def : Pat<(i1 1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
+  def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
 }
 def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
           (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;

Added: llvm/trunk/test/CodeGen/X86/avx512-mask-bugfix.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-bugfix.ll?rev=237536&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-bugfix.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-bugfix.ll Sun May 17 02:28:51 2015
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl  | FileCheck %s
+
+; ModuleID = 'foo.ll'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) #0
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.cttz.i64(i64, i1) #0
+
+; Function Attrs: nounwind
+define void @foo(float* noalias %aFOO, float %b, i32 %a) {
+allocas:
+  %full_mask_memory.i57 = alloca <8 x float>
+  %return_value_memory.i60 = alloca i1
+  %cmp.i = icmp eq i32 %a, 65535
+  br i1 %cmp.i, label %all_on, label %some_on
+
+all_on:
+  %mask0 = load <8 x float>, <8 x float>* %full_mask_memory.i57
+  %v0.i.i.i70 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %mask0) #0
+  %allon.i.i76 = icmp eq i32 %v0.i.i.i70, 65535
+  br i1 %allon.i.i76, label %check_neighbors.i.i121, label %domixed.i.i100
+
+domixed.i.i100: 
+  br label %check_neighbors.i.i121
+
+check_neighbors.i.i121: 
+  %v1.i5.i.i116 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %mask0) #0
+  %alleq.i.i120 = icmp eq i32 %v1.i5.i.i116, 65535
+  br i1 %alleq.i.i120, label %all_equal.i.i123, label %not_all_equal.i.i124
+
+; CHECK: kxnorw  %k0, %k0, %k0
+; CHECK: kshiftrw        $15, %k0, %k0
+; CHECK: jmp
+; CHECK: kxorw   %k0, %k0, %k0
+
+all_equal.i.i123:
+  br label %reduce_equal___vyi.exit128
+
+not_all_equal.i.i124:        
+  br label %reduce_equal___vyi.exit128
+
+reduce_equal___vyi.exit128:
+  %calltmp2.i125 = phi i1 [ true, %all_equal.i.i123 ], [ false, %not_all_equal.i.i124 ]
+  store i1 %calltmp2.i125, i1* %return_value_memory.i60
+  %return_value.i126 = load i1, i1* %return_value_memory.i60
+  %. = select i1 %return_value.i126, i32 1, i32 0
+  %select_to_float = sitofp i32 %. to float
+  ret void
+
+some_on:
+  ret void
+}
+

Propchange: llvm/trunk/test/CodeGen/X86/avx512-mask-bugfix.ll
------------------------------------------------------------------------------
    svn:executable = *





More information about the llvm-commits mailing list