[llvm] 63fc849 - [X86] Add missing intrinsic test for aesdecwide128kl and aesdecwide256kl. Capture all output values in keylocker tests. NFC

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 2 18:34:41 PDT 2020


Author: Craig Topper
Date: 2020-10-02T18:16:26-07:00
New Revision: 63fc8499f305be192f0f9a62c43e0fcfdbdb7607

URL: https://github.com/llvm/llvm-project/commit/63fc8499f305be192f0f9a62c43e0fcfdbdb7607
DIFF: https://github.com/llvm/llvm-project/commit/63fc8499f305be192f0f9a62c43e0fcfdbdb7607.diff

LOG: [X86] Add missing intrinsic test for aesdecwide128kl and aesdecwide256kl. Capture all output values in keylocker tests. NFC

The aesdec/enc instructions produce a flag output and one or eight
xmm regsiter outputs. The test were not capturing the xmm outputs.

Also add nounwind to tests to remove .cfi directives

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/keylocker-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/keylocker-intrinsics.ll b/llvm/test/CodeGen/X86/keylocker-intrinsics.ll
index 472eed484a16..d577ffd12e08 100644
--- a/llvm/test/CodeGen/X86/keylocker-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/keylocker-intrinsics.ll
@@ -12,7 +12,9 @@ declare { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64>, i8*)
 declare { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64>, i8*)
 declare { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64>, i8*)
 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
 
 define void @test_loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi) {
 ; X64-LABEL: test_loadiwkey:
@@ -31,7 +33,7 @@ entry:
   ret void
 }
 
-define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, <2 x i64>* nocapture %h0, <2 x i64>* nocapture %h1, <2 x i64>* nocapture %h2, <2 x i64>* nocapture %h3, <2 x i64>* nocapture %h4, <2 x i64>* nocapture %h5) {
+define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, <2 x i64>* nocapture %h0, <2 x i64>* nocapture %h1, <2 x i64>* nocapture %h2, <2 x i64>* nocapture %h3, <2 x i64>* nocapture %h4, <2 x i64>* nocapture %h5) nounwind {
 ; X64-LABEL: test_encodekey128_u32:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10
@@ -47,17 +49,9 @@ define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, <2 x i64>* nocaptu
 ; X32-LABEL: test_encodekey128_u32:
 ; X32:       # %bb.0: # %entry
 ; X32-NEXT:    pushl %ebp
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    pushl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 12
 ; X32-NEXT:    pushl %edi
-; X32-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 20
-; X32-NEXT:    .cfi_offset %esi, -20
-; X32-NEXT:    .cfi_offset %edi, -16
-; X32-NEXT:    .cfi_offset %ebx, -12
-; X32-NEXT:    .cfi_offset %ebp, -8
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
@@ -73,13 +67,9 @@ define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, <2 x i64>* nocaptu
 ; X32-NEXT:    vmovaps %xmm5, (%edx)
 ; X32-NEXT:    vmovaps %xmm6, (%ecx)
 ; X32-NEXT:    popl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NEXT:    popl %edi
-; X32-NEXT:    .cfi_def_cfa_offset 12
 ; X32-NEXT:    popl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebp
-; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 entry:
   %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %htype, <2 x i64> %key)
@@ -99,7 +89,7 @@ entry:
   ret i32 %7
 }
 
-define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi, <2 x i64>* nocapture %h0, <2 x i64>* nocapture %h1, <2 x i64>* nocapture %h2, <2 x i64>* nocapture %h3, <2 x i64>* nocapture %h4, <2 x i64>* nocapture %h5, <2 x      i64>* nocapture readnone %h6) {
+define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi, <2 x i64>* nocapture %h0, <2 x i64>* nocapture %h1, <2 x i64>* nocapture %h2, <2 x i64>* nocapture %h3, <2 x i64>* nocapture %h4, <2 x i64>* nocapture %h5, <2 x      i64>* nocapture readnone %h6) nounwind {
 ; X64-LABEL: test_encodekey256_u32:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10
@@ -115,17 +105,9 @@ define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_
 ; X32-LABEL: test_encodekey256_u32:
 ; X32:       # %bb.0: # %entry
 ; X32-NEXT:    pushl %ebp
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    pushl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 12
 ; X32-NEXT:    pushl %edi
-; X32-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 20
-; X32-NEXT:    .cfi_offset %esi, -20
-; X32-NEXT:    .cfi_offset %edi, -16
-; X32-NEXT:    .cfi_offset %ebx, -12
-; X32-NEXT:    .cfi_offset %ebp, -8
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
@@ -141,13 +123,9 @@ define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_
 ; X32-NEXT:    vmovaps %xmm4, (%edx)
 ; X32-NEXT:    vmovaps %xmm5, (%ecx)
 ; X32-NEXT:    popl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NEXT:    popl %edi
-; X32-NEXT:    .cfi_def_cfa_offset 12
 ; X32-NEXT:    popl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 8
 ; X32-NEXT:    popl %ebp
-; X32-NEXT:    .cfi_def_cfa_offset 4
 ; X32-NEXT:    retl
 entry:
   %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi)
@@ -167,96 +145,126 @@ entry:
   ret i32 %7
 }
 
-define i8 @test_mm_aesenc128kl_u8(<2 x i64> %data, i8* %h) {
+define i8 @test_mm_aesenc128kl_u8(<2 x i64> %data, i8* %h, <2 x i64>* %out) {
 ; X64-LABEL: test_mm_aesenc128kl_u8:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    aesenc128kl (%rdi), %xmm0
 ; X64-NEXT:    sete %al
+; X64-NEXT:    movaps %xmm0, (%rsi)
 ; X64-NEXT:    retq
 ;
 ; X32-LABEL: test_mm_aesenc128kl_u8:
 ; X32:       # %bb.0: # %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    aesenc128kl (%eax), %xmm0
 ; X32-NEXT:    sete %al
+; X32-NEXT:    vmovaps %xmm0, (%ecx)
 ; X32-NEXT:    retl
 entry:
   %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %data, i8* %h)
-  %1 = extractvalue { i8, <2 x i64> } %0, 0
-  ret i8 %1
+  %1 = extractvalue { i8, <2 x i64> } %0, 1
+  store <2 x i64> %1, <2 x i64>* %out
+  %2 = extractvalue { i8, <2 x i64> } %0, 0
+  ret i8 %2
 }
 
-define i8 @test_mm_aesdec128kl_u8(<2 x i64> %data, i8* %h) {
+define i8 @test_mm_aesdec128kl_u8(<2 x i64> %data, i8* %h, <2 x i64>* %out) {
 ; X64-LABEL: test_mm_aesdec128kl_u8:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    aesdec128kl (%rdi), %xmm0
 ; X64-NEXT:    sete %al
+; X64-NEXT:    movaps %xmm0, (%rsi)
 ; X64-NEXT:    retq
 ;
 ; X32-LABEL: test_mm_aesdec128kl_u8:
 ; X32:       # %bb.0: # %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    aesdec128kl (%eax), %xmm0
 ; X32-NEXT:    sete %al
+; X32-NEXT:    vmovaps %xmm0, (%ecx)
 ; X32-NEXT:    retl
 entry:
   %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %data, i8* %h)
-  %1 = extractvalue { i8, <2 x i64> } %0, 0
-  ret i8 %1
+  %1 = extractvalue { i8, <2 x i64> } %0, 1
+  store <2 x i64> %1, <2 x i64>* %out
+  %2 = extractvalue { i8, <2 x i64> } %0, 0
+  ret i8 %2
 }
 
-define i8 @test_mm_aesenc256kl_u8(<2 x i64> %data, i8* %h) {
+define i8 @test_mm_aesenc256kl_u8(<2 x i64> %data, i8* %h, <2 x i64>* %out) {
 ; X64-LABEL: test_mm_aesenc256kl_u8:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    aesenc256kl (%rdi), %xmm0
 ; X64-NEXT:    sete %al
+; X64-NEXT:    movaps %xmm0, (%rsi)
 ; X64-NEXT:    retq
 ;
 ; X32-LABEL: test_mm_aesenc256kl_u8:
 ; X32:       # %bb.0: # %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    aesenc256kl (%eax), %xmm0
 ; X32-NEXT:    sete %al
+; X32-NEXT:    vmovaps %xmm0, (%ecx)
 ; X32-NEXT:    retl
 entry:
   %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %data, i8* %h)
-  %1 = extractvalue { i8, <2 x i64> } %0, 0
-  ret i8 %1
+  %1 = extractvalue { i8, <2 x i64> } %0, 1
+  store <2 x i64> %1, <2 x i64>* %out
+  %2 = extractvalue { i8, <2 x i64> } %0, 0
+  ret i8 %2
 }
 
-define i8 @test_mm_aesdec256kl_u8(<2 x i64> %data, i8* %h) {
+define i8 @test_mm_aesdec256kl_u8(<2 x i64> %data, i8* %h, <2 x i64>* %out) {
 ; X64-LABEL: test_mm_aesdec256kl_u8:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    aesdec256kl (%rdi), %xmm0
 ; X64-NEXT:    sete %al
+; X64-NEXT:    movaps %xmm0, (%rsi)
 ; X64-NEXT:    retq
 ;
 ; X32-LABEL: test_mm_aesdec256kl_u8:
 ; X32:       # %bb.0: # %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    aesdec256kl (%eax), %xmm0
 ; X32-NEXT:    sete %al
+; X32-NEXT:    vmovaps %xmm0, (%ecx)
 ; X32-NEXT:    retl
 entry:
   %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %data, i8* %h)
-  %1 = extractvalue { i8, <2 x i64> } %0, 0
-  ret i8 %1
+  %1 = extractvalue { i8, <2 x i64> } %0, 1
+  store <2 x i64> %1, <2 x i64>* %out
+  %2 = extractvalue { i8, <2 x i64> } %0, 0
+  ret i8 %2
 }
 
-define i8 @test_mm_aesencwide128kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) {
+define i8 @test_mm_aesencwide128kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, <2 x i64>* %out0, <2 x i64>* %out1, <2 x i64>* %out2, <2 x i64>* %out3, <2 x i64>* %out4, <2 x i64>* %out5, <2 x i64>* %out6, <2 x i64>* %out7) nounwind {
 ; X64-LABEL: test_mm_aesencwide128kl_u8:
 ; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r11
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
 ; X64-NEXT:    aesencwide128kl (%rdi)
 ; X64-NEXT:    sete %al
+; X64-NEXT:    movaps %xmm0, (%rsi)
+; X64-NEXT:    movaps %xmm1, (%rdx)
+; X64-NEXT:    movaps %xmm1, (%rcx)
+; X64-NEXT:    movaps %xmm1, (%r8)
+; X64-NEXT:    movaps %xmm1, (%r9)
+; X64-NEXT:    movaps %xmm1, (%rbx)
+; X64-NEXT:    movaps %xmm1, (%r11)
+; X64-NEXT:    movaps %xmm1, (%r10)
+; X64-NEXT:    popq %rbx
 ; X64-NEXT:    retq
 ;
 ; X32-LABEL: test_mm_aesencwide128kl_u8:
 ; X32:       # %bb.0: # %entry
 ; X32-NEXT:    pushl %ebp
-; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    .cfi_offset %ebp, -8
 ; X32-NEXT:    movl %esp, %ebp
-; X32-NEXT:    .cfi_def_cfa_register %ebp
 ; X32-NEXT:    andl $-16, %esp
 ; X32-NEXT:    subl $16, %esp
 ; X32-NEXT:    vmovaps 24(%ebp), %xmm3
@@ -266,31 +274,147 @@ define i8 @test_mm_aesencwide128kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x
 ; X32-NEXT:    vmovaps 88(%ebp), %xmm7
 ; X32-NEXT:    movl 8(%ebp), %eax
 ; X32-NEXT:    aesencwide128kl (%eax)
+; X32-NEXT:    movl 104(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm0, (%eax)
+; X32-NEXT:    movl 108(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 112(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 116(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 120(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 124(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 128(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 132(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
 ; X32-NEXT:    sete %al
 ; X32-NEXT:    movl %ebp, %esp
 ; X32-NEXT:    popl %ebp
-; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 entry:
   %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6,      <2 x i64> %v7)
-  %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
-  ret i8 %1
+  %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
+  store <2 x i64> %1, <2 x i64>* %out0
+  %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
+  store <2 x i64> %2, <2 x i64>* %out1
+  %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
+  store <2 x i64> %2, <2 x i64>* %out2
+  %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
+  store <2 x i64> %2, <2 x i64>* %out3
+  %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
+  store <2 x i64> %2, <2 x i64>* %out4
+  %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
+  store <2 x i64> %2, <2 x i64>* %out5
+  %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
+  store <2 x i64> %2, <2 x i64>* %out6
+  %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
+  store <2 x i64> %2, <2 x i64>* %out7
+  %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
+  ret i8 %9
+}
+
+define i8 @test_mm_aesdecwide128kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, <2 x i64>* %out0, <2 x i64>* %out1, <2 x i64>* %out2, <2 x i64>* %out3, <2 x i64>* %out4, <2 x i64>* %out5, <2 x i64>* %out6, <2 x i64>* %out7) nounwind {
+; X64-LABEL: test_mm_aesdecwide128kl_u8:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r11
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
+; X64-NEXT:    aesdecwide128kl (%rdi)
+; X64-NEXT:    sete %al
+; X64-NEXT:    movaps %xmm0, (%rsi)
+; X64-NEXT:    movaps %xmm1, (%rdx)
+; X64-NEXT:    movaps %xmm1, (%rcx)
+; X64-NEXT:    movaps %xmm1, (%r8)
+; X64-NEXT:    movaps %xmm1, (%r9)
+; X64-NEXT:    movaps %xmm1, (%rbx)
+; X64-NEXT:    movaps %xmm1, (%r11)
+; X64-NEXT:    movaps %xmm1, (%r10)
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_mm_aesdecwide128kl_u8:
+; X32:       # %bb.0: # %entry
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    andl $-16, %esp
+; X32-NEXT:    subl $16, %esp
+; X32-NEXT:    vmovaps 24(%ebp), %xmm3
+; X32-NEXT:    vmovaps 40(%ebp), %xmm4
+; X32-NEXT:    vmovaps 56(%ebp), %xmm5
+; X32-NEXT:    vmovaps 72(%ebp), %xmm6
+; X32-NEXT:    vmovaps 88(%ebp), %xmm7
+; X32-NEXT:    movl 8(%ebp), %eax
+; X32-NEXT:    aesdecwide128kl (%eax)
+; X32-NEXT:    movl 104(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm0, (%eax)
+; X32-NEXT:    movl 108(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 112(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 116(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 120(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 124(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 128(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 132(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    sete %al
+; X32-NEXT:    movl %ebp, %esp
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    retl
+entry:
+  %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6,      <2 x i64> %v7)
+  %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
+  store <2 x i64> %1, <2 x i64>* %out0
+  %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
+  store <2 x i64> %2, <2 x i64>* %out1
+  %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
+  store <2 x i64> %2, <2 x i64>* %out2
+  %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
+  store <2 x i64> %2, <2 x i64>* %out3
+  %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
+  store <2 x i64> %2, <2 x i64>* %out4
+  %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
+  store <2 x i64> %2, <2 x i64>* %out5
+  %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
+  store <2 x i64> %2, <2 x i64>* %out6
+  %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
+  store <2 x i64> %2, <2 x i64>* %out7
+  %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
+  ret i8 %9
 }
 
-define i8 @test_mm_aesencwide256kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) {
+define i8 @test_mm_aesencwide256kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, <2 x i64>* %out0, <2 x i64>* %out1, <2 x i64>* %out2, <2 x i64>* %out3, <2 x i64>* %out4, <2 x i64>* %out5, <2 x i64>* %out6, <2 x i64>* %out7) nounwind {
 ; X64-LABEL: test_mm_aesencwide256kl_u8:
 ; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r11
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
 ; X64-NEXT:    aesencwide256kl (%rdi)
 ; X64-NEXT:    sete %al
+; X64-NEXT:    movaps %xmm0, (%rsi)
+; X64-NEXT:    movaps %xmm1, (%rdx)
+; X64-NEXT:    movaps %xmm1, (%rcx)
+; X64-NEXT:    movaps %xmm1, (%r8)
+; X64-NEXT:    movaps %xmm1, (%r9)
+; X64-NEXT:    movaps %xmm1, (%rbx)
+; X64-NEXT:    movaps %xmm1, (%r11)
+; X64-NEXT:    movaps %xmm1, (%r10)
+; X64-NEXT:    popq %rbx
 ; X64-NEXT:    retq
 ;
 ; X32-LABEL: test_mm_aesencwide256kl_u8:
 ; X32:       # %bb.0: # %entry
 ; X32-NEXT:    pushl %ebp
-; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    .cfi_offset %ebp, -8
 ; X32-NEXT:    movl %esp, %ebp
-; X32-NEXT:    .cfi_def_cfa_register %ebp
 ; X32-NEXT:    andl $-16, %esp
 ; X32-NEXT:    subl $16, %esp
 ; X32-NEXT:    vmovaps 24(%ebp), %xmm3
@@ -300,13 +424,119 @@ define i8 @test_mm_aesencwide256kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x
 ; X32-NEXT:    vmovaps 88(%ebp), %xmm7
 ; X32-NEXT:    movl 8(%ebp), %eax
 ; X32-NEXT:    aesencwide256kl (%eax)
+; X32-NEXT:    movl 104(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm0, (%eax)
+; X32-NEXT:    movl 108(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 112(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 116(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 120(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 124(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 128(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 132(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
 ; X32-NEXT:    sete %al
 ; X32-NEXT:    movl %ebp, %esp
 ; X32-NEXT:    popl %ebp
-; X32-NEXT:    .cfi_def_cfa %esp, 4
 ; X32-NEXT:    retl
 entry:
   %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6,      <2 x i64> %v7)
-  %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
-  ret i8 %1
+  %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
+  store <2 x i64> %1, <2 x i64>* %out0
+  %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
+  store <2 x i64> %2, <2 x i64>* %out1
+  %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
+  store <2 x i64> %2, <2 x i64>* %out2
+  %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
+  store <2 x i64> %2, <2 x i64>* %out3
+  %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
+  store <2 x i64> %2, <2 x i64>* %out4
+  %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
+  store <2 x i64> %2, <2 x i64>* %out5
+  %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
+  store <2 x i64> %2, <2 x i64>* %out6
+  %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
+  store <2 x i64> %2, <2 x i64>* %out7
+  %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
+  ret i8 %9
+}
+
+define i8 @test_mm_aesdecwide256kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, <2 x i64>* %out0, <2 x i64>* %out1, <2 x i64>* %out2, <2 x i64>* %out3, <2 x i64>* %out4, <2 x i64>* %out5, <2 x i64>* %out6, <2 x i64>* %out7) nounwind {
+; X64-LABEL: test_mm_aesdecwide256kl_u8:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r11
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
+; X64-NEXT:    aesdecwide256kl (%rdi)
+; X64-NEXT:    sete %al
+; X64-NEXT:    movaps %xmm0, (%rsi)
+; X64-NEXT:    movaps %xmm1, (%rdx)
+; X64-NEXT:    movaps %xmm1, (%rcx)
+; X64-NEXT:    movaps %xmm1, (%r8)
+; X64-NEXT:    movaps %xmm1, (%r9)
+; X64-NEXT:    movaps %xmm1, (%rbx)
+; X64-NEXT:    movaps %xmm1, (%r11)
+; X64-NEXT:    movaps %xmm1, (%r10)
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    retq
+;
+; X32-LABEL: test_mm_aesdecwide256kl_u8:
+; X32:       # %bb.0: # %entry
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    andl $-16, %esp
+; X32-NEXT:    subl $16, %esp
+; X32-NEXT:    vmovaps 24(%ebp), %xmm3
+; X32-NEXT:    vmovaps 40(%ebp), %xmm4
+; X32-NEXT:    vmovaps 56(%ebp), %xmm5
+; X32-NEXT:    vmovaps 72(%ebp), %xmm6
+; X32-NEXT:    vmovaps 88(%ebp), %xmm7
+; X32-NEXT:    movl 8(%ebp), %eax
+; X32-NEXT:    aesdecwide256kl (%eax)
+; X32-NEXT:    movl 104(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm0, (%eax)
+; X32-NEXT:    movl 108(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 112(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 116(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 120(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 124(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 128(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    movl 132(%ebp), %eax
+; X32-NEXT:    vmovaps %xmm1, (%eax)
+; X32-NEXT:    sete %al
+; X32-NEXT:    movl %ebp, %esp
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    retl
+entry:
+  %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6,      <2 x i64> %v7)
+  %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
+  store <2 x i64> %1, <2 x i64>* %out0
+  %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
+  store <2 x i64> %2, <2 x i64>* %out1
+  %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
+  store <2 x i64> %2, <2 x i64>* %out2
+  %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
+  store <2 x i64> %2, <2 x i64>* %out3
+  %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
+  store <2 x i64> %2, <2 x i64>* %out4
+  %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
+  store <2 x i64> %2, <2 x i64>* %out5
+  %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
+  store <2 x i64> %2, <2 x i64>* %out6
+  %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
+  store <2 x i64> %2, <2 x i64>* %out7
+  %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
+  ret i8 %9
 }


        


More information about the llvm-commits mailing list