[llvm] a02b449 - [X86] Sync AESENC/DEC Key Locker builtins with gcc.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 4 12:10:38 PDT 2020
Author: Craig Topper
Date: 2020-10-04T12:09:41-07:00
New Revision: a02b449bb1556fe0f17b86eaa69f6bcda945d123
URL: https://github.com/llvm/llvm-project/commit/a02b449bb1556fe0f17b86eaa69f6bcda945d123
DIFF: https://github.com/llvm/llvm-project/commit/a02b449bb1556fe0f17b86eaa69f6bcda945d123.diff
LOG: [X86] Sync AESENC/DEC Key Locker builtins with gcc.
For the wide builtins, pass a single input and output pointer to
the builtins. Emit the GEPs and input loads from CGBuiltin.
Added:
Modified:
clang/include/clang/Basic/BuiltinsX86.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Headers/keylockerintrin.h
clang/test/CodeGen/X86/keylocker.c
llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index c33026139b3c..8f9cfe4b6dc5 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -1902,22 +1902,16 @@ TARGET_BUILTIN(__builtin_ia32_enqcmds, "Ucv*vC*", "n", "enqcmd")
// KEY LOCKER
TARGET_BUILTIN(__builtin_ia32_loadiwkey, "vV2OiV2OiV2OiUi", "nV:128:", "kl")
-TARGET_BUILTIN(__builtin_ia32_encodekey128_u32,
- "UiUiV2Oiv*", "nV:128:", "kl")
-TARGET_BUILTIN(__builtin_ia32_encodekey256_u32,
- "UiUiV2OiV2Oiv*", "nV:128:", "kl")
-TARGET_BUILTIN(__builtin_ia32_aesenc128kl, "UcV2Oi*V2OivC*", "nV:128:", "kl")
-TARGET_BUILTIN(__builtin_ia32_aesenc256kl, "UcV2Oi*V2OivC*", "nV:128:", "kl")
-TARGET_BUILTIN(__builtin_ia32_aesdec128kl, "UcV2Oi*V2OivC*", "nV:128:", "kl")
-TARGET_BUILTIN(__builtin_ia32_aesdec256kl, "UcV2Oi*V2OivC*", "nV:128:", "kl")
-TARGET_BUILTIN(__builtin_ia32_aesencwide128kl,
- "UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl")
-TARGET_BUILTIN(__builtin_ia32_aesencwide256kl,
- "UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl")
-TARGET_BUILTIN(__builtin_ia32_aesdecwide128kl,
- "UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl")
-TARGET_BUILTIN(__builtin_ia32_aesdecwide256kl,
- "UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl")
+TARGET_BUILTIN(__builtin_ia32_encodekey128_u32, "UiUiV2Oiv*", "nV:128:", "kl")
+TARGET_BUILTIN(__builtin_ia32_encodekey256_u32, "UiUiV2OiV2Oiv*", "nV:128:", "kl")
+TARGET_BUILTIN(__builtin_ia32_aesenc128kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl")
+TARGET_BUILTIN(__builtin_ia32_aesenc256kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl")
+TARGET_BUILTIN(__builtin_ia32_aesdec128kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl")
+TARGET_BUILTIN(__builtin_ia32_aesdec256kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl")
+TARGET_BUILTIN(__builtin_ia32_aesencwide128kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl")
+TARGET_BUILTIN(__builtin_ia32_aesencwide256kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl")
+TARGET_BUILTIN(__builtin_ia32_aesdecwide128kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl")
+TARGET_BUILTIN(__builtin_ia32_aesdecwide256kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl")
// SERIALIZE
TARGET_BUILTIN(__builtin_ia32_serialize, "v", "n", "serialize")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d3603579844d..dc3cafa5d062 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14070,75 +14070,67 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateExtractValue(Call, 0);
}
- case X86::BI__builtin_ia32_aesenc128kl:
- case X86::BI__builtin_ia32_aesdec128kl:
- case X86::BI__builtin_ia32_aesenc256kl:
- case X86::BI__builtin_ia32_aesdec256kl:
- case X86::BI__builtin_ia32_aesencwide128kl:
- case X86::BI__builtin_ia32_aesdecwide128kl:
- case X86::BI__builtin_ia32_aesencwide256kl:
- case X86::BI__builtin_ia32_aesdecwide256kl: {
- int FirstReturnOp;
- int ResultCount;
- SmallVector<Value*, 9> InOps;
- unsigned ID;
-
+ case X86::BI__builtin_ia32_aesenc128kl_u8:
+ case X86::BI__builtin_ia32_aesdec128kl_u8:
+ case X86::BI__builtin_ia32_aesenc256kl_u8:
+ case X86::BI__builtin_ia32_aesdec256kl_u8: {
+ Intrinsic::ID IID;
switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_aesenc128kl:
- case X86::BI__builtin_ia32_aesdec128kl:
- case X86::BI__builtin_ia32_aesenc256kl:
- case X86::BI__builtin_ia32_aesdec256kl: {
- InOps = {Ops[1], Ops[2]};
- FirstReturnOp = 0;
- ResultCount = 1;
- switch (BuiltinID) {
- case X86::BI__builtin_ia32_aesenc128kl:
- ID = Intrinsic::x86_aesenc128kl;
- break;
- case X86::BI__builtin_ia32_aesdec128kl:
- ID = Intrinsic::x86_aesdec128kl;
- break;
- case X86::BI__builtin_ia32_aesenc256kl:
- ID = Intrinsic::x86_aesenc256kl;
- break;
- case X86::BI__builtin_ia32_aesdec256kl:
- ID = Intrinsic::x86_aesdec256kl;
- break;
- }
+ default: llvm_unreachable("Unexpected builtin");
+ case X86::BI__builtin_ia32_aesenc128kl_u8:
+ IID = Intrinsic::x86_aesenc128kl;
+ break;
+ case X86::BI__builtin_ia32_aesdec128kl_u8:
+ IID = Intrinsic::x86_aesdec128kl;
+ break;
+ case X86::BI__builtin_ia32_aesenc256kl_u8:
+ IID = Intrinsic::x86_aesenc256kl;
+ break;
+ case X86::BI__builtin_ia32_aesdec256kl_u8:
+ IID = Intrinsic::x86_aesdec256kl;
break;
}
- case X86::BI__builtin_ia32_aesencwide128kl:
- case X86::BI__builtin_ia32_aesdecwide128kl:
- case X86::BI__builtin_ia32_aesencwide256kl:
- case X86::BI__builtin_ia32_aesdecwide256kl: {
- InOps = {Ops[0], Ops[9], Ops[10], Ops[11], Ops[12], Ops[13],
- Ops[14], Ops[15], Ops[16]};
- FirstReturnOp = 1;
- ResultCount = 8;
- switch (BuiltinID) {
- case X86::BI__builtin_ia32_aesencwide128kl:
- ID = Intrinsic::x86_aesencwide128kl;
- break;
- case X86::BI__builtin_ia32_aesdecwide128kl:
- ID = Intrinsic::x86_aesdecwide128kl;
- break;
- case X86::BI__builtin_ia32_aesencwide256kl:
- ID = Intrinsic::x86_aesencwide256kl;
- break;
- case X86::BI__builtin_ia32_aesdecwide256kl:
- ID = Intrinsic::x86_aesdecwide256kl;
- break;
- }
+
+ Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
+
+ Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
+ Ops[0]);
+
+ return Builder.CreateExtractValue(Call, 0);
+ }
+ case X86::BI__builtin_ia32_aesencwide128kl_u8:
+ case X86::BI__builtin_ia32_aesdecwide128kl_u8:
+ case X86::BI__builtin_ia32_aesencwide256kl_u8:
+ case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ case X86::BI__builtin_ia32_aesencwide128kl_u8:
+ IID = Intrinsic::x86_aesencwide128kl;
+ break;
+ case X86::BI__builtin_ia32_aesdecwide128kl_u8:
+ IID = Intrinsic::x86_aesdecwide128kl;
+ break;
+ case X86::BI__builtin_ia32_aesencwide256kl_u8:
+ IID = Intrinsic::x86_aesencwide256kl;
+ break;
+ case X86::BI__builtin_ia32_aesdecwide256kl_u8:
+ IID = Intrinsic::x86_aesdecwide256kl;
break;
}
+
+ Value *InOps[9];
+ InOps[0] = Ops[2];
+ for (int i = 0; i != 8; ++i) {
+ Value *Ptr = Builder.CreateConstGEP1_32(Ops[1], i);
+ InOps[i + 1] = Builder.CreateAlignedLoad(Ptr, Align(16));
}
- Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), InOps);
+ Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
- for (int i = 0; i < ResultCount; ++i) {
- Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, i + 1),
- Ops[FirstReturnOp + i]);
+ for (int i = 0; i != 8; ++i) {
+ Value *Extract = Builder.CreateExtractValue(Call, i + 1);
+ Value *Ptr = Builder.CreateConstGEP1_32(Ops[0], i);
+ Builder.CreateAlignedStore(Extract, Ptr, Align(16));
}
return Builder.CreateExtractValue(Call, 0);
diff --git a/clang/lib/Headers/keylockerintrin.h b/clang/lib/Headers/keylockerintrin.h
index c31ba16122a5..c15d39c8e392 100644
--- a/clang/lib/Headers/keylockerintrin.h
+++ b/clang/lib/Headers/keylockerintrin.h
@@ -211,7 +211,7 @@ _mm_encodekey256_u32(unsigned int __htype, __m128i __key_lo, __m128i __key_hi,
/// \endoperation
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_mm_aesenc128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
- return __builtin_ia32_aesenc128kl(__odata, __idata, __h);
+ return __builtin_ia32_aesenc128kl_u8((__v2di *)__odata, (__v2di)__idata, __h);
}
/// The AESENC256KL performs 14 rounds of AES to encrypt the __idata using
@@ -248,7 +248,7 @@ _mm_aesenc128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
/// \endoperation
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_mm_aesenc256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
- return __builtin_ia32_aesenc256kl(__odata, __idata, __h);
+ return __builtin_ia32_aesenc256kl_u8((__v2di *)__odata, (__v2di)__idata, __h);
}
/// The AESDEC128KL performs 10 rounds of AES to decrypt the __idata using
@@ -285,7 +285,7 @@ _mm_aesenc256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
/// \endoperation
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_mm_aesdec128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
- return __builtin_ia32_aesdec128kl(__odata, __idata, __h);
+ return __builtin_ia32_aesdec128kl_u8((__v2di *)__odata, (__v2di)__idata, __h);
}
/// The AESDEC256KL performs 10 rounds of AES to decrypt the __idata using
@@ -322,7 +322,7 @@ _mm_aesdec128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
/// \endoperation
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
- return __builtin_ia32_aesdec256kl(__odata, __idata, __h);
+ return __builtin_ia32_aesdec256kl_u8((__v2di *)__odata, (__v2di)__idata, __h);
}
#undef __DEFAULT_FN_ATTRS
@@ -374,23 +374,8 @@ _mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
/// \endoperation
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) {
- return __builtin_ia32_aesencwide128kl(__h,
- __odata,
- __odata + 1,
- __odata + 2,
- __odata + 3,
- __odata + 4,
- __odata + 5,
- __odata + 6,
- __odata + 7,
- __idata[0],
- __idata[1],
- __idata[2],
- __idata[3],
- __idata[4],
- __idata[5],
- __idata[6],
- __idata[7]);
+ return __builtin_ia32_aesencwide128kl_u8((__v2di *)__odata,
+ (const __v2di *)__idata, __h);
}
/// Encrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle
@@ -429,23 +414,8 @@ _mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void*
/// \endoperation
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) {
- return __builtin_ia32_aesencwide256kl(__h,
- __odata,
- __odata + 1,
- __odata + 2,
- __odata + 3,
- __odata + 4,
- __odata + 5,
- __odata + 6,
- __odata + 7,
- __idata[0],
- __idata[1],
- __idata[2],
- __idata[3],
- __idata[4],
- __idata[5],
- __idata[6],
- __idata[7]);
+ return __builtin_ia32_aesencwide256kl_u8((__v2di *)__odata,
+ (const __v2di *)__idata, __h);
}
/// Decrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle
@@ -484,23 +454,8 @@ _mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void*
/// \endoperation
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) {
- return __builtin_ia32_aesdecwide128kl(__h,
- __odata,
- __odata + 1,
- __odata + 2,
- __odata + 3,
- __odata + 4,
- __odata + 5,
- __odata + 6,
- __odata + 7,
- __idata[0],
- __idata[1],
- __idata[2],
- __idata[3],
- __idata[4],
- __idata[5],
- __idata[6],
- __idata[7]);
+ return __builtin_ia32_aesdecwide128kl_u8((__v2di *)__odata,
+ (const __v2di *)__idata, __h);
}
/// Decrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle
@@ -539,23 +494,8 @@ _mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void*
/// \endoperation
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_mm_aesdecwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) {
- return __builtin_ia32_aesdecwide256kl(__h,
- __odata,
- __odata + 1,
- __odata + 2,
- __odata + 3,
- __odata + 4,
- __odata + 5,
- __odata + 6,
- __odata + 7,
- __idata[0],
- __idata[1],
- __idata[2],
- __idata[3],
- __idata[4],
- __idata[5],
- __idata[6],
- __idata[7]);
+ return __builtin_ia32_aesdecwide256kl_u8((__v2di *)__odata,
+ (const __v2di *)__idata, __h);
}
#undef __DEFAULT_FN_ATTRS
diff --git a/clang/test/CodeGen/X86/keylocker.c b/clang/test/CodeGen/X86/keylocker.c
index b410d53b4b83..b87fe22d7761 100644
--- a/clang/test/CodeGen/X86/keylocker.c
+++ b/clang/test/CodeGen/X86/keylocker.c
@@ -78,47 +78,215 @@ unsigned int test_encodekey256_u32(unsigned int htype, __m128i key_lo, __m128i k
unsigned char test_mm_aesenc256kl_u8(__m128i *odata, __m128i idata, const void *h) {
//CHECK-LABEL: @test_mm_aesenc256kl_u8
//CHECK: call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %{{.*}}, i8* %{{.*}})
+ //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 1
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 0
return _mm_aesenc256kl_u8(odata, idata, h);
}
unsigned char test_mm_aesdec256kl_u8(__m128i *odata, __m128i idata, const void *h) {
//CHECK-LABEL: @test_mm_aesdec256kl_u8
//CHECK: call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %{{.*}}, i8* %{{.*}})
+ //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 1
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 0
return _mm_aesdec256kl_u8(odata, idata, h);
}
unsigned char test_mm_aesenc128kl_u8(__m128i *odata, __m128i idata, const void *h) {
//CHECK-LABEL: @test_mm_aesenc128kl_u8
//CHECK: call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %{{.*}}, i8* %{{.*}})
+ //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 1
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 0
return _mm_aesenc128kl_u8(odata, idata, h);
}
unsigned char test_mm_aesdec128kl_u8(__m128i *odata, __m128i idata, const void *h) {
//CHECK-LABEL: @test_mm_aesdec128kl_u8
//CHECK: call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %{{.*}}, i8* %{{.*}})
+ //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 1
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 0
return _mm_aesdec128kl_u8(odata, idata, h);
}
unsigned char test__mm_aesencwide128kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) {
//CHECK-LABEL: @test__mm_aesencwide128kl
- //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 8
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0
return _mm_aesencwide128kl_u8(odata, idata, h);
}
unsigned char test__mm_aesdecwide128kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) {
//CHECK-LABEL: @test__mm_aesdecwide128kl
- //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 8
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0
return _mm_aesdecwide128kl_u8(odata, idata, h);
}
unsigned char test__mm_aesencwide256kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) {
//CHECK-LABEL: @test__mm_aesencwide256kl
- //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 8
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0
return _mm_aesencwide256kl_u8(odata, idata, h);
}
unsigned char test__mm_aesdecwide256kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) {
//CHECK-LABEL: @test__mm_aesdecwide256kl
- //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7
+ //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+ //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 8
+ //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7
+ //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+ //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0
return _mm_aesdecwide256kl_u8(odata, idata, h);
}
diff --git a/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll
index b5518ec44dc2..a2443ffbc4e6 100644
--- a/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll
@@ -99,6 +99,346 @@ entry:
ret i32 %21
}
+define zeroext i8 @test_mm_aesenc256kl_u8(<2 x i64>* %odata, <2 x i64> %idata, i8* %h) {
+; CHECK-LABEL: test_mm_aesenc256kl_u8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: aesenc256kl (%rsi), %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movaps %xmm0, (%rdi)
+; CHECK-NEXT: retq
+entry:
+ %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %idata, i8* %h) #1
+ %1 = extractvalue { i8, <2 x i64> } %0, 1
+ store <2 x i64> %1, <2 x i64>* %odata, align 16
+ %2 = extractvalue { i8, <2 x i64> } %0, 0
+ ret i8 %2
+}
+
+define zeroext i8 @test_mm_aesdec256kl_u8(<2 x i64>* %odata, <2 x i64> %idata, i8* %h) {
+; CHECK-LABEL: test_mm_aesdec256kl_u8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: aesdec256kl (%rsi), %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movaps %xmm0, (%rdi)
+; CHECK-NEXT: retq
+entry:
+ %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %idata, i8* %h) #1
+ %1 = extractvalue { i8, <2 x i64> } %0, 1
+ store <2 x i64> %1, <2 x i64>* %odata, align 16
+ %2 = extractvalue { i8, <2 x i64> } %0, 0
+ ret i8 %2
+}
+
+define zeroext i8 @test_mm_aesenc128kl_u8(<2 x i64>* %odata, <2 x i64> %idata, i8* %h) {
+; CHECK-LABEL: test_mm_aesenc128kl_u8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: aesenc128kl (%rsi), %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movaps %xmm0, (%rdi)
+; CHECK-NEXT: retq
+entry:
+ %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %idata, i8* %h) #1
+ %1 = extractvalue { i8, <2 x i64> } %0, 1
+ store <2 x i64> %1, <2 x i64>* %odata, align 16
+ %2 = extractvalue { i8, <2 x i64> } %0, 0
+ ret i8 %2
+}
+
+define zeroext i8 @test_mm_aesdec128kl_u8(<2 x i64>* %odata, <2 x i64> %idata, i8* %h) {
+; CHECK-LABEL: test_mm_aesdec128kl_u8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: aesdec128kl (%rsi), %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movaps %xmm0, (%rdi)
+; CHECK-NEXT: retq
+entry:
+ %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %idata, i8* %h) #1
+ %1 = extractvalue { i8, <2 x i64> } %0, 1
+ store <2 x i64> %1, <2 x i64>* %odata, align 16
+ %2 = extractvalue { i8, <2 x i64> } %0, 0
+ ret i8 %2
+}
+
+define zeroext i8 @test__mm_aesencwide128kl_u8(<2 x i64>* %odata, <2 x i64>* %idata, i8* %h) {
+; CHECK-LABEL: test__mm_aesencwide128kl_u8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movaps (%rsi), %xmm0
+; CHECK-NEXT: movaps 16(%rsi), %xmm1
+; CHECK-NEXT: movaps 32(%rsi), %xmm2
+; CHECK-NEXT: movaps 48(%rsi), %xmm3
+; CHECK-NEXT: movaps 64(%rsi), %xmm4
+; CHECK-NEXT: movaps 80(%rsi), %xmm5
+; CHECK-NEXT: movaps 96(%rsi), %xmm6
+; CHECK-NEXT: movaps 112(%rsi), %xmm7
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: aesencwide128kl (%rdx)
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movaps %xmm0, (%rdi)
+; CHECK-NEXT: movaps %xmm1, 16(%rdi)
+; CHECK-NEXT: movaps %xmm2, 32(%rdi)
+; CHECK-NEXT: movaps %xmm3, 48(%rdi)
+; CHECK-NEXT: movaps %xmm4, 64(%rdi)
+; CHECK-NEXT: movaps %xmm5, 80(%rdi)
+; CHECK-NEXT: movaps %xmm6, 96(%rdi)
+; CHECK-NEXT: movaps %xmm7, 112(%rdi)
+; CHECK-NEXT: retq
+entry:
+ %0 = load <2 x i64>, <2 x i64>* %idata, align 16
+ %1 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 1
+ %2 = load <2 x i64>, <2 x i64>* %1, align 16
+ %3 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 2
+ %4 = load <2 x i64>, <2 x i64>* %3, align 16
+ %5 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 3
+ %6 = load <2 x i64>, <2 x i64>* %5, align 16
+ %7 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 4
+ %8 = load <2 x i64>, <2 x i64>* %7, align 16
+ %9 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 5
+ %10 = load <2 x i64>, <2 x i64>* %9, align 16
+ %11 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 6
+ %12 = load <2 x i64>, <2 x i64>* %11, align 16
+ %13 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 7
+ %14 = load <2 x i64>, <2 x i64>* %13, align 16
+ %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1
+ %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1
+ store <2 x i64> %16, <2 x i64>* %odata, align 16
+ %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2
+ %18 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 1
+ store <2 x i64> %17, <2 x i64>* %18, align 16
+ %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3
+ %20 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 2
+ store <2 x i64> %19, <2 x i64>* %20, align 16
+ %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4
+ %22 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 3
+ store <2 x i64> %21, <2 x i64>* %22, align 16
+ %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5
+ %24 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 4
+ store <2 x i64> %23, <2 x i64>* %24, align 16
+ %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6
+ %26 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 5
+ store <2 x i64> %25, <2 x i64>* %26, align 16
+ %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7
+ %28 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 6
+ store <2 x i64> %27, <2 x i64>* %28, align 16
+ %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8
+ %30 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 7
+ store <2 x i64> %29, <2 x i64>* %30, align 16
+ %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0
+ ret i8 %31
+}
+
+define zeroext i8 @test__mm_aesdecwide128kl_u8(<2 x i64>* %odata, <2 x i64>* %idata, i8* %h) {
+; CHECK-LABEL: test__mm_aesdecwide128kl_u8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movaps (%rsi), %xmm0
+; CHECK-NEXT: movaps 16(%rsi), %xmm1
+; CHECK-NEXT: movaps 32(%rsi), %xmm2
+; CHECK-NEXT: movaps 48(%rsi), %xmm3
+; CHECK-NEXT: movaps 64(%rsi), %xmm4
+; CHECK-NEXT: movaps 80(%rsi), %xmm5
+; CHECK-NEXT: movaps 96(%rsi), %xmm6
+; CHECK-NEXT: movaps 112(%rsi), %xmm7
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: aesdecwide128kl (%rdx)
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movaps %xmm0, (%rdi)
+; CHECK-NEXT: movaps %xmm1, 16(%rdi)
+; CHECK-NEXT: movaps %xmm2, 32(%rdi)
+; CHECK-NEXT: movaps %xmm3, 48(%rdi)
+; CHECK-NEXT: movaps %xmm4, 64(%rdi)
+; CHECK-NEXT: movaps %xmm5, 80(%rdi)
+; CHECK-NEXT: movaps %xmm6, 96(%rdi)
+; CHECK-NEXT: movaps %xmm7, 112(%rdi)
+; CHECK-NEXT: retq
+entry:
+ %0 = load <2 x i64>, <2 x i64>* %idata, align 16
+ %1 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 1
+ %2 = load <2 x i64>, <2 x i64>* %1, align 16
+ %3 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 2
+ %4 = load <2 x i64>, <2 x i64>* %3, align 16
+ %5 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 3
+ %6 = load <2 x i64>, <2 x i64>* %5, align 16
+ %7 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 4
+ %8 = load <2 x i64>, <2 x i64>* %7, align 16
+ %9 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 5
+ %10 = load <2 x i64>, <2 x i64>* %9, align 16
+ %11 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 6
+ %12 = load <2 x i64>, <2 x i64>* %11, align 16
+ %13 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 7
+ %14 = load <2 x i64>, <2 x i64>* %13, align 16
+ %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1
+ %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1
+ store <2 x i64> %16, <2 x i64>* %odata, align 16
+ %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2
+ %18 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 1
+ store <2 x i64> %17, <2 x i64>* %18, align 16
+ %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3
+ %20 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 2
+ store <2 x i64> %19, <2 x i64>* %20, align 16
+ %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4
+ %22 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 3
+ store <2 x i64> %21, <2 x i64>* %22, align 16
+ %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5
+ %24 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 4
+ store <2 x i64> %23, <2 x i64>* %24, align 16
+ %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6
+ %26 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 5
+ store <2 x i64> %25, <2 x i64>* %26, align 16
+ %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7
+ %28 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 6
+ store <2 x i64> %27, <2 x i64>* %28, align 16
+ %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8
+ %30 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 7
+ store <2 x i64> %29, <2 x i64>* %30, align 16
+ %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0
+ ret i8 %31
+}
+
+define zeroext i8 @test__mm_aesencwide256kl_u8(<2 x i64>* %odata, <2 x i64>* %idata, i8* %h) {
+; CHECK-LABEL: test__mm_aesencwide256kl_u8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movaps (%rsi), %xmm0
+; CHECK-NEXT: movaps 16(%rsi), %xmm1
+; CHECK-NEXT: movaps 32(%rsi), %xmm2
+; CHECK-NEXT: movaps 48(%rsi), %xmm3
+; CHECK-NEXT: movaps 64(%rsi), %xmm4
+; CHECK-NEXT: movaps 80(%rsi), %xmm5
+; CHECK-NEXT: movaps 96(%rsi), %xmm6
+; CHECK-NEXT: movaps 112(%rsi), %xmm7
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: aesencwide256kl (%rdx)
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movaps %xmm0, (%rdi)
+; CHECK-NEXT: movaps %xmm1, 16(%rdi)
+; CHECK-NEXT: movaps %xmm2, 32(%rdi)
+; CHECK-NEXT: movaps %xmm3, 48(%rdi)
+; CHECK-NEXT: movaps %xmm4, 64(%rdi)
+; CHECK-NEXT: movaps %xmm5, 80(%rdi)
+; CHECK-NEXT: movaps %xmm6, 96(%rdi)
+; CHECK-NEXT: movaps %xmm7, 112(%rdi)
+; CHECK-NEXT: retq
+entry:
+ %0 = load <2 x i64>, <2 x i64>* %idata, align 16
+ %1 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 1
+ %2 = load <2 x i64>, <2 x i64>* %1, align 16
+ %3 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 2
+ %4 = load <2 x i64>, <2 x i64>* %3, align 16
+ %5 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 3
+ %6 = load <2 x i64>, <2 x i64>* %5, align 16
+ %7 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 4
+ %8 = load <2 x i64>, <2 x i64>* %7, align 16
+ %9 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 5
+ %10 = load <2 x i64>, <2 x i64>* %9, align 16
+ %11 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 6
+ %12 = load <2 x i64>, <2 x i64>* %11, align 16
+ %13 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 7
+ %14 = load <2 x i64>, <2 x i64>* %13, align 16
+ %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1
+ %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1
+ store <2 x i64> %16, <2 x i64>* %odata, align 16
+ %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2
+ %18 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 1
+ store <2 x i64> %17, <2 x i64>* %18, align 16
+ %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3
+ %20 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 2
+ store <2 x i64> %19, <2 x i64>* %20, align 16
+ %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4
+ %22 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 3
+ store <2 x i64> %21, <2 x i64>* %22, align 16
+ %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5
+ %24 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 4
+ store <2 x i64> %23, <2 x i64>* %24, align 16
+ %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6
+ %26 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 5
+ store <2 x i64> %25, <2 x i64>* %26, align 16
+ %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7
+ %28 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 6
+ store <2 x i64> %27, <2 x i64>* %28, align 16
+ %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8
+ %30 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 7
+ store <2 x i64> %29, <2 x i64>* %30, align 16
+ %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0
+ ret i8 %31
+}
+
+define zeroext i8 @test__mm_aesdecwide256kl_u8(<2 x i64>* %odata, <2 x i64>* %idata, i8* %h) {
+; CHECK-LABEL: test__mm_aesdecwide256kl_u8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movaps (%rsi), %xmm0
+; CHECK-NEXT: movaps 16(%rsi), %xmm1
+; CHECK-NEXT: movaps 32(%rsi), %xmm2
+; CHECK-NEXT: movaps 48(%rsi), %xmm3
+; CHECK-NEXT: movaps 64(%rsi), %xmm4
+; CHECK-NEXT: movaps 80(%rsi), %xmm5
+; CHECK-NEXT: movaps 96(%rsi), %xmm6
+; CHECK-NEXT: movaps 112(%rsi), %xmm7
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: aesdecwide256kl (%rdx)
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movaps %xmm0, (%rdi)
+; CHECK-NEXT: movaps %xmm1, 16(%rdi)
+; CHECK-NEXT: movaps %xmm2, 32(%rdi)
+; CHECK-NEXT: movaps %xmm3, 48(%rdi)
+; CHECK-NEXT: movaps %xmm4, 64(%rdi)
+; CHECK-NEXT: movaps %xmm5, 80(%rdi)
+; CHECK-NEXT: movaps %xmm6, 96(%rdi)
+; CHECK-NEXT: movaps %xmm7, 112(%rdi)
+; CHECK-NEXT: retq
+entry:
+ %0 = load <2 x i64>, <2 x i64>* %idata, align 16
+ %1 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 1
+ %2 = load <2 x i64>, <2 x i64>* %1, align 16
+ %3 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 2
+ %4 = load <2 x i64>, <2 x i64>* %3, align 16
+ %5 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 3
+ %6 = load <2 x i64>, <2 x i64>* %5, align 16
+ %7 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 4
+ %8 = load <2 x i64>, <2 x i64>* %7, align 16
+ %9 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 5
+ %10 = load <2 x i64>, <2 x i64>* %9, align 16
+ %11 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 6
+ %12 = load <2 x i64>, <2 x i64>* %11, align 16
+ %13 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 7
+ %14 = load <2 x i64>, <2 x i64>* %13, align 16
+ %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1
+ %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1
+ store <2 x i64> %16, <2 x i64>* %odata, align 16
+ %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2
+ %18 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 1
+ store <2 x i64> %17, <2 x i64>* %18, align 16
+ %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3
+ %20 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 2
+ store <2 x i64> %19, <2 x i64>* %20, align 16
+ %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4
+ %22 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 3
+ store <2 x i64> %21, <2 x i64>* %22, align 16
+ %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5
+ %24 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 4
+ store <2 x i64> %23, <2 x i64>* %24, align 16
+ %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6
+ %26 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 5
+ store <2 x i64> %25, <2 x i64>* %26, align 16
+ %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7
+ %28 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 6
+ store <2 x i64> %27, <2 x i64>* %28, align 16
+ %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8
+ %30 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 7
+ store <2 x i64> %29, <2 x i64>* %30, align 16
+ %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0
+ ret i8 %31
+}
+
declare void @llvm.x86.loadiwkey(<2 x i64>, <2 x i64>, <2 x i64>, i32)
declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>)
declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32, <2 x i64>, <2 x i64>)
+declare { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64>, i8*)
+declare { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64>, i8*)
+declare { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64>, i8*)
+declare { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64>, i8*)
+declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
More information about the llvm-commits
mailing list