[llvm] r319745 - [X86] Fix a bug in handling GRXX subclasses in Domain Reassignment pass

Guy Blank via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 5 01:08:24 PST 2017


Author: guyblank
Date: Tue Dec  5 01:08:24 2017
New Revision: 319745

URL: http://llvm.org/viewvc/llvm-project?rev=319745&view=rev
Log:
[X86] Fix a bug in handling GRXX subclasses in Domain Reassignment pass

When trying to determine the correct Mask register class corresponding
to a GPR register class, not all register classes were handled.
This caused an assertion to be raised on some scenarios.

Differential Revision:
https://reviews.llvm.org/D40290

Modified:
    llvm/trunk/lib/Target/X86/X86DomainReassignment.cpp
    llvm/trunk/test/CodeGen/X86/gpr-to-mask.ll

Modified: llvm/trunk/lib/Target/X86/X86DomainReassignment.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86DomainReassignment.cpp?rev=319745&r1=319744&r2=319745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86DomainReassignment.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86DomainReassignment.cpp Tue Dec  5 01:08:24 2017
@@ -70,13 +70,13 @@ static RegDomain getDomain(const TargetR
 static const TargetRegisterClass *getDstRC(const TargetRegisterClass *SrcRC,
                                            RegDomain Domain) {
   assert(Domain == MaskDomain && "add domain");
-  if (SrcRC == &X86::GR8RegClass)
+  if (X86::GR8RegClass.hasSubClassEq(SrcRC))
     return &X86::VK8RegClass;
-  if (SrcRC == &X86::GR16RegClass)
+  if (X86::GR16RegClass.hasSubClassEq(SrcRC))
     return &X86::VK16RegClass;
-  if (SrcRC == &X86::GR32RegClass)
+  if (X86::GR32RegClass.hasSubClassEq(SrcRC))
     return &X86::VK32RegClass;
-  if (SrcRC == &X86::GR64RegClass)
+  if (X86::GR64RegClass.hasSubClassEq(SrcRC))
     return &X86::VK64RegClass;
   llvm_unreachable("add register class");
   return nullptr;

Modified: llvm/trunk/test/CodeGen/X86/gpr-to-mask.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/gpr-to-mask.ll?rev=319745&r1=319744&r2=319745&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/gpr-to-mask.ll (original)
+++ llvm/trunk/test/CodeGen/X86/gpr-to-mask.ll Tue Dec  5 01:08:24 2017
@@ -1,20 +1,40 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq < %s | FileCheck %s --check-prefix=X86-64
+; RUN: llc -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq < %s | FileCheck %s --check-prefix=X86-32
 
 define void @test_fcmp_storefloat(i1 %cond, float* %fptr, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) {
-; CHECK-LABEL: test_fcmp_storefloat:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    je .LBB0_2
-; CHECK-NEXT:  # %bb.1: # %if
-; CHECK-NEXT:    vcmpeqss %xmm3, %xmm2, %k1
-; CHECK-NEXT:    jmp .LBB0_3
-; CHECK-NEXT:  .LBB0_2: # %else
-; CHECK-NEXT:    vcmpeqss %xmm5, %xmm4, %k1
-; CHECK-NEXT:  .LBB0_3: # %exit
-; CHECK-NEXT:    vmovss %xmm0, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT:    vmovss %xmm1, (%rsi)
-; CHECK-NEXT:    retq
+; X86-64-LABEL: test_fcmp_storefloat:
+; X86-64:       # %bb.0: # %entry
+; X86-64-NEXT:    testb $1, %dil
+; X86-64-NEXT:    je .LBB0_2
+; X86-64-NEXT:  # %bb.1: # %if
+; X86-64-NEXT:    vcmpeqss %xmm3, %xmm2, %k1
+; X86-64-NEXT:    jmp .LBB0_3
+; X86-64-NEXT:  .LBB0_2: # %else
+; X86-64-NEXT:    vcmpeqss %xmm5, %xmm4, %k1
+; X86-64-NEXT:  .LBB0_3: # %exit
+; X86-64-NEXT:    vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; X86-64-NEXT:    vmovss %xmm1, (%rsi)
+; X86-64-NEXT:    retq
+;
+; X86-32-LABEL: test_fcmp_storefloat:
+; X86-32:       # %bb.0: # %entry
+; X86-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-32-NEXT:    je .LBB0_2
+; X86-32-NEXT:  # %bb.1: # %if
+; X86-32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-32-NEXT:    vcmpeqss {{[0-9]+}}(%esp), %xmm2, %k1
+; X86-32-NEXT:    jmp .LBB0_3
+; X86-32-NEXT:  .LBB0_2: # %else
+; X86-32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-32-NEXT:    vcmpeqss {{[0-9]+}}(%esp), %xmm2, %k1
+; X86-32-NEXT:  .LBB0_3: # %exit
+; X86-32-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X86-32-NEXT:    vmovss %xmm0, (%eax)
+; X86-32-NEXT:    retl
 entry:
   br i1 %cond, label %if, label %else
 
@@ -34,20 +54,38 @@ exit:
 }
 
 define void @test_fcmp_storei1(i1 %cond, float* %fptr, i1* %iptr, float %f1, float %f2, float %f3, float %f4) {
-; CHECK-LABEL: test_fcmp_storei1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    je .LBB1_2
-; CHECK-NEXT:  # %bb.1: # %if
-; CHECK-NEXT:    vcmpeqss %xmm1, %xmm0, %k0
-; CHECK-NEXT:    jmp .LBB1_3
-; CHECK-NEXT:  .LBB1_2: # %else
-; CHECK-NEXT:    vcmpeqss %xmm3, %xmm2, %k0
-; CHECK-NEXT:  .LBB1_3: # %exit
-; CHECK-NEXT:    kmovd %k0, %eax
-; CHECK-NEXT:    andb $1, %al
-; CHECK-NEXT:    movb %al, (%rdx)
-; CHECK-NEXT:    retq
+; X86-64-LABEL: test_fcmp_storei1:
+; X86-64:       # %bb.0: # %entry
+; X86-64-NEXT:    testb $1, %dil
+; X86-64-NEXT:    je .LBB1_2
+; X86-64-NEXT:  # %bb.1: # %if
+; X86-64-NEXT:    vcmpeqss %xmm1, %xmm0, %k0
+; X86-64-NEXT:    jmp .LBB1_3
+; X86-64-NEXT:  .LBB1_2: # %else
+; X86-64-NEXT:    vcmpeqss %xmm3, %xmm2, %k0
+; X86-64-NEXT:  .LBB1_3: # %exit
+; X86-64-NEXT:    kmovd %k0, %eax
+; X86-64-NEXT:    andb $1, %al
+; X86-64-NEXT:    movb %al, (%rdx)
+; X86-64-NEXT:    retq
+;
+; X86-32-LABEL: test_fcmp_storei1:
+; X86-32:       # %bb.0: # %entry
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-32-NEXT:    je .LBB1_2
+; X86-32-NEXT:  # %bb.1: # %if
+; X86-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-32-NEXT:    vcmpeqss {{[0-9]+}}(%esp), %xmm0, %k0
+; X86-32-NEXT:    jmp .LBB1_3
+; X86-32-NEXT:  .LBB1_2: # %else
+; X86-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-32-NEXT:    vcmpeqss {{[0-9]+}}(%esp), %xmm0, %k0
+; X86-32-NEXT:  .LBB1_3: # %exit
+; X86-32-NEXT:    kmovd %k0, %ecx
+; X86-32-NEXT:    andb $1, %cl
+; X86-32-NEXT:    movb %cl, (%eax)
+; X86-32-NEXT:    retl
 entry:
   br i1 %cond, label %if, label %else
 
@@ -66,21 +104,42 @@ exit:
 }
 
 define void @test_load_add(i1 %cond, float* %fptr, i1* %iptr1, i1* %iptr2, float %f1, float %f2)  {
-; CHECK-LABEL: test_load_add:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    je .LBB2_2
-; CHECK-NEXT:  # %bb.1: # %if
-; CHECK-NEXT:    kmovb (%rdx), %k0
-; CHECK-NEXT:    kmovb (%rcx), %k1
-; CHECK-NEXT:    kaddb %k1, %k0, %k1
-; CHECK-NEXT:    jmp .LBB2_3
-; CHECK-NEXT:  .LBB2_2: # %else
-; CHECK-NEXT:    kmovb (%rcx), %k1
-; CHECK-NEXT:  .LBB2_3: # %exit
-; CHECK-NEXT:    vmovss %xmm0, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT:    vmovss %xmm1, (%rsi)
-; CHECK-NEXT:    retq
+; X86-64-LABEL: test_load_add:
+; X86-64:       # %bb.0: # %entry
+; X86-64-NEXT:    testb $1, %dil
+; X86-64-NEXT:    je .LBB2_2
+; X86-64-NEXT:  # %bb.1: # %if
+; X86-64-NEXT:    kmovb (%rdx), %k0
+; X86-64-NEXT:    kmovb (%rcx), %k1
+; X86-64-NEXT:    kaddb %k1, %k0, %k1
+; X86-64-NEXT:    jmp .LBB2_3
+; X86-64-NEXT:  .LBB2_2: # %else
+; X86-64-NEXT:    kmovb (%rcx), %k1
+; X86-64-NEXT:  .LBB2_3: # %exit
+; X86-64-NEXT:    vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; X86-64-NEXT:    vmovss %xmm1, (%rsi)
+; X86-64-NEXT:    retq
+;
+; X86-32-LABEL: test_load_add:
+; X86-32:       # %bb.0: # %entry
+; X86-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-32-NEXT:    je .LBB2_2
+; X86-32-NEXT:  # %bb.1: # %if
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-32-NEXT:    kmovb (%edx), %k0
+; X86-32-NEXT:    kmovb (%ecx), %k1
+; X86-32-NEXT:    kaddb %k1, %k0, %k1
+; X86-32-NEXT:    jmp .LBB2_3
+; X86-32-NEXT:  .LBB2_2: # %else
+; X86-32-NEXT:    kmovb (%ecx), %k1
+; X86-32-NEXT:  .LBB2_3: # %exit
+; X86-32-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X86-32-NEXT:    vmovss %xmm0, (%eax)
+; X86-32-NEXT:    retl
 entry:
   br i1 %cond, label %if, label %else
 
@@ -102,19 +161,37 @@ exit:
 }
 
 define void @test_load_i1(i1 %cond, float* %fptr, i1* %iptr1, i1* %iptr2, float %f1, float %f2)  {
-; CHECK-LABEL: test_load_i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    je .LBB3_2
-; CHECK-NEXT:  # %bb.1: # %if
-; CHECK-NEXT:    kmovb (%rdx), %k1
-; CHECK-NEXT:    jmp .LBB3_3
-; CHECK-NEXT:  .LBB3_2: # %else
-; CHECK-NEXT:    kmovb (%rcx), %k1
-; CHECK-NEXT:  .LBB3_3: # %exit
-; CHECK-NEXT:    vmovss %xmm0, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT:    vmovss %xmm1, (%rsi)
-; CHECK-NEXT:    retq
+; X86-64-LABEL: test_load_i1:
+; X86-64:       # %bb.0: # %entry
+; X86-64-NEXT:    testb $1, %dil
+; X86-64-NEXT:    je .LBB3_2
+; X86-64-NEXT:  # %bb.1: # %if
+; X86-64-NEXT:    kmovb (%rdx), %k1
+; X86-64-NEXT:    jmp .LBB3_3
+; X86-64-NEXT:  .LBB3_2: # %else
+; X86-64-NEXT:    kmovb (%rcx), %k1
+; X86-64-NEXT:  .LBB3_3: # %exit
+; X86-64-NEXT:    vmovss %xmm0, %xmm0, %xmm1 {%k1}
+; X86-64-NEXT:    vmovss %xmm1, (%rsi)
+; X86-64-NEXT:    retq
+;
+; X86-32-LABEL: test_load_i1:
+; X86-32:       # %bb.0: # %entry
+; X86-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-32-NEXT:    je .LBB3_2
+; X86-32-NEXT:  # %bb.1: # %if
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-32-NEXT:    jmp .LBB3_3
+; X86-32-NEXT:  .LBB3_2: # %else
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-32-NEXT:  .LBB3_3: # %exit
+; X86-32-NEXT:    kmovb (%ecx), %k1
+; X86-32-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X86-32-NEXT:    vmovss %xmm0, (%eax)
+; X86-32-NEXT:    retl
 entry:
   br i1 %cond, label %if, label %else
 
@@ -134,19 +211,35 @@ exit:
 }
 
 define void @test_loadi1_storei1(i1 %cond, i1* %iptr1, i1* %iptr2, i1* %iptr3)  {
-; CHECK-LABEL: test_loadi1_storei1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    je .LBB4_2
-; CHECK-NEXT:  # %bb.1: # %if
-; CHECK-NEXT:    movb (%rsi), %al
-; CHECK-NEXT:    jmp .LBB4_3
-; CHECK-NEXT:  .LBB4_2: # %else
-; CHECK-NEXT:    movb (%rdx), %al
-; CHECK-NEXT:  .LBB4_3: # %exit
-; CHECK-NEXT:    andb $1, %al
-; CHECK-NEXT:    movb %al, (%rcx)
-; CHECK-NEXT:    retq
+; X86-64-LABEL: test_loadi1_storei1:
+; X86-64:       # %bb.0: # %entry
+; X86-64-NEXT:    testb $1, %dil
+; X86-64-NEXT:    je .LBB4_2
+; X86-64-NEXT:  # %bb.1: # %if
+; X86-64-NEXT:    movb (%rsi), %al
+; X86-64-NEXT:    jmp .LBB4_3
+; X86-64-NEXT:  .LBB4_2: # %else
+; X86-64-NEXT:    movb (%rdx), %al
+; X86-64-NEXT:  .LBB4_3: # %exit
+; X86-64-NEXT:    andb $1, %al
+; X86-64-NEXT:    movb %al, (%rcx)
+; X86-64-NEXT:    retq
+;
+; X86-32-LABEL: test_loadi1_storei1:
+; X86-32:       # %bb.0: # %entry
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-32-NEXT:    je .LBB4_2
+; X86-32-NEXT:  # %bb.1: # %if
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-32-NEXT:    jmp .LBB4_3
+; X86-32-NEXT:  .LBB4_2: # %else
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-32-NEXT:  .LBB4_3: # %exit
+; X86-32-NEXT:    movb (%ecx), %cl
+; X86-32-NEXT:    andb $1, %cl
+; X86-32-NEXT:    movb %cl, (%eax)
+; X86-32-NEXT:    retl
 entry:
   br i1 %cond, label %if, label %else
 
@@ -165,23 +258,44 @@ exit:
 }
 
 define void @test_shl1(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) {
-; CHECK-LABEL: test_shl1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
-; CHECK-NEXT:    # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    je .LBB5_2
-; CHECK-NEXT:  # %bb.1: # %if
-; CHECK-NEXT:    kmovb (%rsi), %k0
-; CHECK-NEXT:    kaddb %k0, %k0, %k1
-; CHECK-NEXT:    jmp .LBB5_3
-; CHECK-NEXT:  .LBB5_2: # %else
-; CHECK-NEXT:    kmovb (%rdx), %k1
-; CHECK-NEXT:  .LBB5_3: # %exit
-; CHECK-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
-; CHECK-NEXT:    vmovaps %ymm1, (%rcx)
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
+; X86-64-LABEL: test_shl1:
+; X86-64:       # %bb.0: # %entry
+; X86-64-NEXT:    # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
+; X86-64-NEXT:    # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
+; X86-64-NEXT:    testb $1, %dil
+; X86-64-NEXT:    je .LBB5_2
+; X86-64-NEXT:  # %bb.1: # %if
+; X86-64-NEXT:    kmovb (%rsi), %k0
+; X86-64-NEXT:    kaddb %k0, %k0, %k1
+; X86-64-NEXT:    jmp .LBB5_3
+; X86-64-NEXT:  .LBB5_2: # %else
+; X86-64-NEXT:    kmovb (%rdx), %k1
+; X86-64-NEXT:  .LBB5_3: # %exit
+; X86-64-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
+; X86-64-NEXT:    vmovaps %ymm1, (%rcx)
+; X86-64-NEXT:    vzeroupper
+; X86-64-NEXT:    retq
+;
+; X86-32-LABEL: test_shl1:
+; X86-32:       # %bb.0: # %entry
+; X86-32-NEXT:    # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
+; X86-32-NEXT:    # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-32-NEXT:    je .LBB5_2
+; X86-32-NEXT:  # %bb.1: # %if
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-32-NEXT:    kmovb (%ecx), %k0
+; X86-32-NEXT:    kaddb %k0, %k0, %k1
+; X86-32-NEXT:    jmp .LBB5_3
+; X86-32-NEXT:  .LBB5_2: # %else
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-32-NEXT:    kmovb (%ecx), %k1
+; X86-32-NEXT:  .LBB5_3: # %exit
+; X86-32-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
+; X86-32-NEXT:    vmovaps %ymm1, (%eax)
+; X86-32-NEXT:    vzeroupper
+; X86-32-NEXT:    retl
 entry:
   br i1 %cond, label %if, label %else
 
@@ -203,24 +317,46 @@ exit:
 }
 
 define void @test_shr1(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) {
-; CHECK-LABEL: test_shr1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
-; CHECK-NEXT:    # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    je .LBB6_2
-; CHECK-NEXT:  # %bb.1: # %if
-; CHECK-NEXT:    movb (%rsi), %al
-; CHECK-NEXT:    shrb %al
-; CHECK-NEXT:    jmp .LBB6_3
-; CHECK-NEXT:  .LBB6_2: # %else
-; CHECK-NEXT:    movb (%rdx), %al
-; CHECK-NEXT:  .LBB6_3: # %exit
-; CHECK-NEXT:    kmovd %eax, %k1
-; CHECK-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
-; CHECK-NEXT:    vmovaps %ymm1, (%rcx)
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
+; X86-64-LABEL: test_shr1:
+; X86-64:       # %bb.0: # %entry
+; X86-64-NEXT:    # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
+; X86-64-NEXT:    # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
+; X86-64-NEXT:    testb $1, %dil
+; X86-64-NEXT:    je .LBB6_2
+; X86-64-NEXT:  # %bb.1: # %if
+; X86-64-NEXT:    movb (%rsi), %al
+; X86-64-NEXT:    shrb %al
+; X86-64-NEXT:    jmp .LBB6_3
+; X86-64-NEXT:  .LBB6_2: # %else
+; X86-64-NEXT:    movb (%rdx), %al
+; X86-64-NEXT:  .LBB6_3: # %exit
+; X86-64-NEXT:    kmovd %eax, %k1
+; X86-64-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
+; X86-64-NEXT:    vmovaps %ymm1, (%rcx)
+; X86-64-NEXT:    vzeroupper
+; X86-64-NEXT:    retq
+;
+; X86-32-LABEL: test_shr1:
+; X86-32:       # %bb.0: # %entry
+; X86-32-NEXT:    # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
+; X86-32-NEXT:    # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-32-NEXT:    je .LBB6_2
+; X86-32-NEXT:  # %bb.1: # %if
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-32-NEXT:    movb (%ecx), %cl
+; X86-32-NEXT:    shrb %cl
+; X86-32-NEXT:    jmp .LBB6_3
+; X86-32-NEXT:  .LBB6_2: # %else
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-32-NEXT:    movb (%ecx), %cl
+; X86-32-NEXT:  .LBB6_3: # %exit
+; X86-32-NEXT:    kmovd %ecx, %k1
+; X86-32-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
+; X86-32-NEXT:    vmovaps %ymm1, (%eax)
+; X86-32-NEXT:    vzeroupper
+; X86-32-NEXT:    retl
 entry:
   br i1 %cond, label %if, label %else
 
@@ -242,23 +378,44 @@ exit:
 }
 
 define void @test_shr2(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) {
-; CHECK-LABEL: test_shr2:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
-; CHECK-NEXT:    # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    je .LBB7_2
-; CHECK-NEXT:  # %bb.1: # %if
-; CHECK-NEXT:    kmovb (%rsi), %k0
-; CHECK-NEXT:    kshiftrb $2, %k0, %k1
-; CHECK-NEXT:    jmp .LBB7_3
-; CHECK-NEXT:  .LBB7_2: # %else
-; CHECK-NEXT:    kmovb (%rdx), %k1
-; CHECK-NEXT:  .LBB7_3: # %exit
-; CHECK-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
-; CHECK-NEXT:    vmovaps %ymm1, (%rcx)
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
+; X86-64-LABEL: test_shr2:
+; X86-64:       # %bb.0: # %entry
+; X86-64-NEXT:    # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
+; X86-64-NEXT:    # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
+; X86-64-NEXT:    testb $1, %dil
+; X86-64-NEXT:    je .LBB7_2
+; X86-64-NEXT:  # %bb.1: # %if
+; X86-64-NEXT:    kmovb (%rsi), %k0
+; X86-64-NEXT:    kshiftrb $2, %k0, %k1
+; X86-64-NEXT:    jmp .LBB7_3
+; X86-64-NEXT:  .LBB7_2: # %else
+; X86-64-NEXT:    kmovb (%rdx), %k1
+; X86-64-NEXT:  .LBB7_3: # %exit
+; X86-64-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
+; X86-64-NEXT:    vmovaps %ymm1, (%rcx)
+; X86-64-NEXT:    vzeroupper
+; X86-64-NEXT:    retq
+;
+; X86-32-LABEL: test_shr2:
+; X86-32:       # %bb.0: # %entry
+; X86-32-NEXT:    # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
+; X86-32-NEXT:    # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-32-NEXT:    je .LBB7_2
+; X86-32-NEXT:  # %bb.1: # %if
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-32-NEXT:    kmovb (%ecx), %k0
+; X86-32-NEXT:    kshiftrb $2, %k0, %k1
+; X86-32-NEXT:    jmp .LBB7_3
+; X86-32-NEXT:  .LBB7_2: # %else
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-32-NEXT:    kmovb (%ecx), %k1
+; X86-32-NEXT:  .LBB7_3: # %exit
+; X86-32-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
+; X86-32-NEXT:    vmovaps %ymm1, (%eax)
+; X86-32-NEXT:    vzeroupper
+; X86-32-NEXT:    retl
 entry:
   br i1 %cond, label %if, label %else
 
@@ -280,23 +437,44 @@ exit:
 }
 
 define void @test_shl(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) {
-; CHECK-LABEL: test_shl:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
-; CHECK-NEXT:    # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    je .LBB8_2
-; CHECK-NEXT:  # %bb.1: # %if
-; CHECK-NEXT:    kmovb (%rsi), %k0
-; CHECK-NEXT:    kshiftlb $6, %k0, %k1
-; CHECK-NEXT:    jmp .LBB8_3
-; CHECK-NEXT:  .LBB8_2: # %else
-; CHECK-NEXT:    kmovb (%rdx), %k1
-; CHECK-NEXT:  .LBB8_3: # %exit
-; CHECK-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
-; CHECK-NEXT:    vmovaps %ymm1, (%rcx)
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
+; X86-64-LABEL: test_shl:
+; X86-64:       # %bb.0: # %entry
+; X86-64-NEXT:    # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
+; X86-64-NEXT:    # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
+; X86-64-NEXT:    testb $1, %dil
+; X86-64-NEXT:    je .LBB8_2
+; X86-64-NEXT:  # %bb.1: # %if
+; X86-64-NEXT:    kmovb (%rsi), %k0
+; X86-64-NEXT:    kshiftlb $6, %k0, %k1
+; X86-64-NEXT:    jmp .LBB8_3
+; X86-64-NEXT:  .LBB8_2: # %else
+; X86-64-NEXT:    kmovb (%rdx), %k1
+; X86-64-NEXT:  .LBB8_3: # %exit
+; X86-64-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
+; X86-64-NEXT:    vmovaps %ymm1, (%rcx)
+; X86-64-NEXT:    vzeroupper
+; X86-64-NEXT:    retq
+;
+; X86-32-LABEL: test_shl:
+; X86-32:       # %bb.0: # %entry
+; X86-32-NEXT:    # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
+; X86-32-NEXT:    # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-32-NEXT:    je .LBB8_2
+; X86-32-NEXT:  # %bb.1: # %if
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-32-NEXT:    kmovb (%ecx), %k0
+; X86-32-NEXT:    kshiftlb $6, %k0, %k1
+; X86-32-NEXT:    jmp .LBB8_3
+; X86-32-NEXT:  .LBB8_2: # %else
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-32-NEXT:    kmovb (%ecx), %k1
+; X86-32-NEXT:  .LBB8_3: # %exit
+; X86-32-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
+; X86-32-NEXT:    vmovaps %ymm1, (%eax)
+; X86-32-NEXT:    vzeroupper
+; X86-32-NEXT:    retl
 entry:
   br i1 %cond, label %if, label %else
 
@@ -318,24 +496,46 @@ exit:
 }
 
 define void @test_add(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) {
-; CHECK-LABEL: test_add:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
-; CHECK-NEXT:    # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
-; CHECK-NEXT:    kmovb (%rsi), %k0
-; CHECK-NEXT:    kmovb (%rdx), %k1
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    je .LBB9_2
-; CHECK-NEXT:  # %bb.1: # %if
-; CHECK-NEXT:    kandb %k1, %k0, %k1
-; CHECK-NEXT:    jmp .LBB9_3
-; CHECK-NEXT:  .LBB9_2: # %else
-; CHECK-NEXT:    kaddb %k1, %k0, %k1
-; CHECK-NEXT:  .LBB9_3: # %exit
-; CHECK-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
-; CHECK-NEXT:    vmovaps %ymm1, (%rcx)
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
+; X86-64-LABEL: test_add:
+; X86-64:       # %bb.0: # %entry
+; X86-64-NEXT:    # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
+; X86-64-NEXT:    # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
+; X86-64-NEXT:    kmovb (%rsi), %k0
+; X86-64-NEXT:    kmovb (%rdx), %k1
+; X86-64-NEXT:    testb $1, %dil
+; X86-64-NEXT:    je .LBB9_2
+; X86-64-NEXT:  # %bb.1: # %if
+; X86-64-NEXT:    kandb %k1, %k0, %k1
+; X86-64-NEXT:    jmp .LBB9_3
+; X86-64-NEXT:  .LBB9_2: # %else
+; X86-64-NEXT:    kaddb %k1, %k0, %k1
+; X86-64-NEXT:  .LBB9_3: # %exit
+; X86-64-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
+; X86-64-NEXT:    vmovaps %ymm1, (%rcx)
+; X86-64-NEXT:    vzeroupper
+; X86-64-NEXT:    retq
+;
+; X86-32-LABEL: test_add:
+; X86-32:       # %bb.0: # %entry
+; X86-32-NEXT:    # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
+; X86-32-NEXT:    # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-32-NEXT:    kmovb (%edx), %k0
+; X86-32-NEXT:    kmovb (%ecx), %k1
+; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-32-NEXT:    je .LBB9_2
+; X86-32-NEXT:  # %bb.1: # %if
+; X86-32-NEXT:    kandb %k1, %k0, %k1
+; X86-32-NEXT:    jmp .LBB9_3
+; X86-32-NEXT:  .LBB9_2: # %else
+; X86-32-NEXT:    kaddb %k1, %k0, %k1
+; X86-32-NEXT:  .LBB9_3: # %exit
+; X86-32-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
+; X86-32-NEXT:    vmovaps %ymm1, (%eax)
+; X86-32-NEXT:    vzeroupper
+; X86-32-NEXT:    retl
 entry:
   %loaded1 = load i8, i8* %ptr1
   %loaded2 = load i8, i8* %ptr2




More information about the llvm-commits mailing list