[clang] [CIR][X86] Add support for `aes` and `aeswide` builtins (PR #175892)
Haocong Lu via cfe-commits
cfe-commits at lists.llvm.org
Wed Jan 14 19:34:13 PST 2026
https://github.com/Luhaocong updated https://github.com/llvm/llvm-project/pull/175892
>From 390e03922b1800a62d486f09c7c77f1e66d5d1ea Mon Sep 17 00:00:00 2001
From: Haocong Lu <haocong.lu at witintech.com>
Date: Wed, 14 Jan 2026 14:12:11 +0800
Subject: [PATCH 1/3] [CIR][X86] Add support for `aes` and `aeswide` builtins
- Support CIR codegen for follow builtin: `aesenc`, `aesdec`,
`aesencwide` and `aesdecwide`
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 170 ++-
.../test/CIR/CodeGenBuiltins/X86/keylocker.c | 1113 +++++++++++++++++
2 files changed, 1280 insertions(+), 3 deletions(-)
create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/keylocker.c
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 1db44f2f97cb2..6e80a9ae7541b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -542,6 +542,123 @@ static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
return builder.createVecCompare(loc, pred, op0, op1);
}
+static mlir::Value emitX86Aes(CIRGenBuilderTy &builder, mlir::Location loc,
+ llvm::StringRef intrinsicName, mlir::Type retType,
+ SmallVectorImpl<mlir::Value> &ops) {
+ // Create return struct type and call intrinsic function.
+ mlir::Type vecType =
+ mlir::cast<cir::PointerType>(ops[0].getType()).getPointee();
+ cir::RecordType rstRecTy = builder.getAnonRecordTy({retType, vecType});
+ mlir::Value rstValueRec = emitIntrinsicCallOp(
+ builder, loc, intrinsicName, rstRecTy, mlir::ValueRange{ops[1], ops[2]});
+
+ // Extract the first return value and truncate it to 1 bit, then cast result
+ // to bool value.
+ mlir::Value flag =
+ cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index*/ 0);
+ mlir::Value flagBit0 = builder.createCast(loc, cir::CastKind::integral, flag,
+ builder.getUIntNTy(1));
+ mlir::Value succ = builder.createCast(loc, cir::CastKind::int_to_bool,
+ flagBit0, builder.getBoolTy());
+
+ // Extract the second return value, store it to output address if success.
+ mlir::Value out =
+ cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index*/ 1);
+ Address outAddr(ops[0], /*align*/ CharUnits::fromQuantity(16));
+ cir::IfOp::create(
+ builder, loc, succ, /*withElseRegion=*/true,
+ /*thenBuilder=*/
+ [&](mlir::OpBuilder &b, mlir::Location) {
+ builder.createStore(loc, out, outAddr);
+ builder.createYield(loc);
+ },
+ /*elseBuilder=*/
+ [&](mlir::OpBuilder &b, mlir::Location) {
+ mlir::Value zero = builder.getNullValue(vecType, loc);
+ builder.createStore(loc, zero, outAddr);
+ builder.createYield(loc);
+ });
+
+ return cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index*/ 0);
+}
+
+static mlir::Value emitX86Aeswide(CIRGenBuilderTy &builder, mlir::Location loc,
+ llvm::StringRef intrinsicName,
+ mlir::Type retType,
+ SmallVectorImpl<mlir::Value> &ops) {
+ mlir::Type rstType =
+ mlir::cast<cir::PointerType>(ops[0].getType()).getPointee();
+ mlir::Type vecType =
+ mlir::cast<cir::PointerType>(ops[1].getType()).getPointee();
+
+ // Create struct for return type and load input arguments, then call
+ // intrinsic function.
+ llvm::SmallVector<mlir::Type, 9> rstRec = {retType};
+ llvm::SmallVector<mlir::Value, 9> arguments = {ops[2]};
+ llvm::SmallVector<mlir::Value, 8> constIdx;
+ for (int i = 0; i < 8; i++) {
+ // Recording return vector type
+ rstRec.push_back(vecType);
+ // Loading each vector argument from input address.
+ cir::ConstantOp idx = builder.getUInt32(i, loc);
+ mlir::Value nextInElePtr =
+ builder.getArrayElement(loc, loc, ops[1], vecType, idx,
+ /*shouldDecay=*/false);
+ mlir::Value arg =
+ builder.createAlignedLoad(loc, vecType, nextInElePtr,
+ /*align*/ CharUnits::fromQuantity(16));
+ arguments.push_back(arg);
+ // Collect constant index.
+ constIdx.push_back(idx);
+ }
+ cir::RecordType rstRecTy = builder.getAnonRecordTy(rstRec);
+ mlir::Value rstValueRec =
+ emitIntrinsicCallOp(builder, loc, intrinsicName, rstRecTy, arguments);
+
+ // Extract the first return value and truncate it to 1 bit, then cast result
+ // to bool value.
+ mlir::Value flag =
+ cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index*/ 0);
+ mlir::Value flagBit0 = builder.createCast(loc, cir::CastKind::integral, flag,
+ builder.getUIntNTy(1));
+ mlir::Value succ = builder.createCast(loc, cir::CastKind::int_to_bool,
+ flagBit0, builder.getBoolTy());
+
+ // Extract other return values, store those to output address if success.
+ cir::IfOp::create(
+ builder, loc, succ, /*withElseRegion=*/true,
+ /*thenBuilder=*/
+ [&](mlir::OpBuilder &b, mlir::Location) {
+ for (int i = 0; i < 8; i++) {
+ mlir::Value out =
+ cir::ExtractMemberOp::create(builder, loc, rstValueRec,
+ /*index*/ i + 1);
+ mlir::Value nextOutEleAddr =
+ builder.getArrayElement(loc, loc, ops[0], vecType, constIdx[i],
+ /*shouldDecay=*/false);
+ Address outAddr(nextOutEleAddr,
+ /*align*/ CharUnits::fromQuantity(16));
+ builder.createStore(loc, out, outAddr);
+ }
+ builder.createYield(loc);
+ },
+ /*elseBuilder=*/
+ [&](mlir::OpBuilder &b, mlir::Location) {
+ mlir::Value zero = builder.getNullValue(vecType, loc);
+ for (int i = 0; i < 8; i++) {
+ mlir::Value nextOutEleAddr =
+ builder.getArrayElement(loc, loc, ops[0], vecType, constIdx[i],
+ /*shouldDecay=*/false);
+ Address outAddr(nextOutEleAddr,
+ /*align*/ CharUnits::fromQuantity(16));
+ builder.createStore(loc, zero, outAddr);
+ }
+ builder.createYield(loc);
+ });
+
+ return cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index*/ 0);
+}
+
std::optional<mlir::Value>
CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -2081,15 +2198,62 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
case X86::BI__readgsdword:
case X86::BI__readgsqword:
case X86::BI__builtin_ia32_encodekey128_u32:
- case X86::BI__builtin_ia32_encodekey256_u32:
+ case X86::BI__builtin_ia32_encodekey256_u32: {
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return mlir::Value{};
+ }
case X86::BI__builtin_ia32_aesenc128kl_u8:
case X86::BI__builtin_ia32_aesdec128kl_u8:
case X86::BI__builtin_ia32_aesenc256kl_u8:
- case X86::BI__builtin_ia32_aesdec256kl_u8:
+ case X86::BI__builtin_ia32_aesdec256kl_u8: {
+ llvm::StringRef intrinsicName;
+ switch (builtinID) {
+ default:
+ llvm_unreachable("Unexpected builtin");
+ case X86::BI__builtin_ia32_aesenc128kl_u8:
+ intrinsicName = "x86.aesenc128kl";
+ break;
+ case X86::BI__builtin_ia32_aesdec128kl_u8:
+ intrinsicName = "x86.aesdec128kl";
+ break;
+ case X86::BI__builtin_ia32_aesenc256kl_u8:
+ intrinsicName = "x86.aesenc256kl";
+ break;
+ case X86::BI__builtin_ia32_aesdec256kl_u8:
+ intrinsicName = "x86.aesdec256kl";
+ break;
+ }
+
+ return emitX86Aes(builder, getLoc(expr->getExprLoc()), intrinsicName,
+ convertType(expr->getType()), ops);
+ }
case X86::BI__builtin_ia32_aesencwide128kl_u8:
case X86::BI__builtin_ia32_aesdecwide128kl_u8:
case X86::BI__builtin_ia32_aesencwide256kl_u8:
- case X86::BI__builtin_ia32_aesdecwide256kl_u8:
+ case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
+ llvm::StringRef intrinsicName;
+ switch (builtinID) {
+ default:
+ llvm_unreachable("Unexpected builtin");
+ case X86::BI__builtin_ia32_aesencwide128kl_u8:
+ intrinsicName = "x86.aesencwide128kl";
+ break;
+ case X86::BI__builtin_ia32_aesdecwide128kl_u8:
+ intrinsicName = "x86.aesdecwide128kl";
+ break;
+ case X86::BI__builtin_ia32_aesencwide256kl_u8:
+ intrinsicName = "x86.aesencwide256kl";
+ break;
+ case X86::BI__builtin_ia32_aesdecwide256kl_u8:
+ intrinsicName = "x86.aesdecwide256kl";
+ break;
+ }
+
+ return emitX86Aeswide(builder, getLoc(expr->getExprLoc()), intrinsicName,
+ convertType(expr->getType()), ops);
+ }
case X86::BI__builtin_ia32_vfcmaddcph512_mask:
case X86::BI__builtin_ia32_vfmaddcph512_mask:
case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/keylocker.c b/clang/test/CIR/CodeGenBuiltins/X86/keylocker.c
new file mode 100644
index 0000000000000..445349eaa768d
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/keylocker.c
@@ -0,0 +1,1113 @@
+// RUN: %clang_cc1 -x c -ffreestanding -triple x86_64-unknown-linux -target-feature +kl -target-feature +widekl -Wno-implicit-function-declaration -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -ffreestanding -triple x86_64-unknown-linux -target-feature +kl -target-feature +widekl -Wno-implicit-function-declaration -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c -ffreestanding -triple x86_64-unknown-linux -target-feature +kl -target-feature +widekl -Wno-implicit-function-declaration -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -ffreestanding -triple x86_64-unknown-linux -Wno-implicit-function-declaration -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -ffreestanding -triple=x86_64-unknown-linux -target-feature +kl -target-feature +widekl -emit-llvm -Wall -Werror %s -o - | FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -ffreestanding -triple=x86_64-unknown-linux -target-feature +kl -target-feature +widekl -emit-llvm -Wall -Werror %s -o - | FileCheck %s -check-prefix=OGCG
+
+// This test mimics clang/test/CodeGen/X86/keylocker.c, which eventually
+// CIR shall be able to support fully.
+
+#include <x86intrin.h>
+
+// CIR: !rec_anon_struct = !cir.record<struct {!u8i, !cir.vector<2 x !s64i>}>
+// CIR: !rec_anon_struct1 = !cir.record<struct {!u8i, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>}>
+
+unsigned char test_mm_aesenc256kl_u8(__m128i *odata, __m128i idata, const void *h) {
+ // CIR-LABEL: _mm_aesenc256kl_u8
+ // CIR: %[[RESULT:.+]] = cir.call_llvm_intrinsic "x86.aesenc256kl" %[[IDATA:.+]], %[[H:.+]] : (!cir.vector<2 x !s64i>, !cir.ptr<!void>) -> !rec_anon_struct
+ // CIR: %[[FLAG:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct -> !u8i
+ // CIR: %[[FLAG_BIT0:.+]] = cir.cast integral %[[FLAG]] : !u8i -> !cir.int<u, 1>
+ // CIR: %[[SUCC:.+]] = cir.cast int_to_bool %[[FLAG_BIT0]] : !cir.int<u, 1> -> !cir.bool
+ // CIR: %[[OUT:.+]] = cir.extract_member %[[RESULT]][1] : !rec_anon_struct -> !cir.vector<2 x !s64i>
+ // CIR: cir.if %[[SUCC]] {
+ // CIR: cir.store align(16) %[[OUT]], %[[ODATA_PTR:.+]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: } else {
+ // CIR: %[[NULL:.+]] = cir.const #cir.zero : !cir.vector<2 x !s64i>
+ // CIR: cir.store align(16) %[[NULL]], %[[ODATA_PTR]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: }
+ // CIR: %[[FLAG1:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct -> !u8i
+ // CIR: cir.store %[[FLAG1]], %[[RET_ADDR:.+]] : !u8i, !cir.ptr<!u8i>
+ // CIR: %[[RET:.+]] = cir.load %[[RET_ADDR]] : !cir.ptr<!u8i>, !u8i
+ // CIR: cir.return %[[RET]] : !u8i
+
+ // LLVM-LABEL: _mm_aesenc256kl_u8
+ // LLVM: %[[RESULT:.+]] = call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %[[IDATA:.+]], ptr %[[H:.+]])
+ // LLVM: %[[FLAG:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 0
+ // LLVM: %[[SUCC:.+]] = trunc i8 %[[FLAG]] to i1
+ // LLVM: %[[OUT:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 1
+ // LLVM: br i1 %[[SUCC]], label %[[NO_ERROR:.+]], label %[[ERROR:.+]]
+ // LLVM: [[NO_ERROR]]:
+ // LLVM: store <2 x i64> %[[OUT]], ptr %[[ODATA_PTR:.+]], align 16
+ // LLVM: br label %[[EXIT:.+]]
+ // LLVM: [[ERROR]]:
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[ODATA_PTR]], align 16
+ // LLVM: br label %[[EXIT]]
+ // LLVM: [[EXIT]]:
+ // LLVM: %[[FLAG1:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 0
+ // LLVM: store i8 %[[FLAG1]], ptr %[[FLAG1_ADDR:.+]], align 1
+ // LLVM: %[[FLAG2:.+]] = load i8, ptr %[[FLAG1_ADDR]], align 1
+ // LLVM: store i8 %[[FLAG2:.+]], ptr %[[FLAG2_ADDR:.+]], align 1
+ // LLVM: %[[RET:.+]] = load i8, ptr %[[FLAG2_ADDR]], align 1
+ // LLVM: ret i8 %[[RET]]
+
+ // OGCG-LABEL: test_mm_aesenc256kl_u8
+ // OGCG: %[[RESULT:.+]] = call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %[[IDATA:.+]], ptr %[[H:.+]])
+ // OGCG: %[[FLAG:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 0
+ // OGCG: %[[SUCC:.+]] = trunc i8 %[[FLAG]] to i1
+ // OGCG: %[[OUT:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 1
+ // OGCG: br i1 %[[SUCC]], label %[[NO_ERROR:.+]], label %[[ERROR:.+]]
+ // OGCG: [[NO_ERROR]]:
+ // OGCG: store <2 x i64> %[[OUT]], ptr %[[ODATA_PTR:.+]], align 16
+ // OGCG: br label %[[EXIT:.+]]
+ // OGCG: [[ERROR]]:
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[ODATA_PTR]], align 16
+ // OGCG: br label %[[EXIT]]
+ // OGCG: [[EXIT]]:
+ // OGCG: %[[RET:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 0
+ // OGCG: ret i8 %[[RET]]
+ return _mm_aesenc256kl_u8(odata, idata, h);
+}
+
+unsigned char test_mm_aesdec256kl_u8(__m128i *odata, __m128i idata, const void *h) {
+ // CIR-LABEL: _mm_aesdec256kl_u8
+ // CIR: %[[RESULT:.+]] = cir.call_llvm_intrinsic "x86.aesdec256kl" %[[IDATA:.+]], %[[H:.+]] : (!cir.vector<2 x !s64i>, !cir.ptr<!void>) -> !rec_anon_struct
+ // CIR: %[[FLAG:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct -> !u8i
+ // CIR: %[[FLAG_BIT0:.+]] = cir.cast integral %[[FLAG]] : !u8i -> !cir.int<u, 1>
+ // CIR: %[[SUCC:.+]] = cir.cast int_to_bool %[[FLAG_BIT0]] : !cir.int<u, 1> -> !cir.bool
+ // CIR: %[[OUT:.+]] = cir.extract_member %[[RESULT]][1] : !rec_anon_struct -> !cir.vector<2 x !s64i>
+ // CIR: cir.if %[[SUCC]] {
+ // CIR: cir.store align(16) %[[OUT]], %[[ODATA_PTR:.+]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: } else {
+ // CIR: %[[NULL:.+]] = cir.const #cir.zero : !cir.vector<2 x !s64i>
+ // CIR: cir.store align(16) %[[NULL]], %[[ODATA_PTR]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: }
+ // CIR: %[[FLAG1:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct -> !u8i
+ // CIR: cir.store %[[FLAG1]], %[[RET_ADDR:.+]] : !u8i, !cir.ptr<!u8i>
+ // CIR: %[[RET:.+]] = cir.load %[[RET_ADDR]] : !cir.ptr<!u8i>, !u8i
+ // CIR: cir.return %[[RET]] : !u8i
+
+ // LLVM-LABEL: _mm_aesdec256kl_u8
+ // LLVM: %[[RESULT:.+]] = call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %[[IDATA:.+]], ptr %[[H:.+]])
+ // LLVM: %[[FLAG:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 0
+ // LLVM: %[[SUCC:.+]] = trunc i8 %[[FLAG]] to i1
+ // LLVM: %[[OUT:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 1
+ // LLVM: br i1 %[[SUCC]], label %[[NO_ERROR:.+]], label %[[ERROR:.+]]
+ // LLVM: [[NO_ERROR]]:
+ // LLVM: store <2 x i64> %[[OUT]], ptr %[[ODATA_PTR:.+]], align 16
+ // LLVM: br label %[[EXIT:.+]]
+ // LLVM: [[ERROR]]:
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[ODATA_PTR]], align 16
+ // LLVM: br label %[[EXIT]]
+ // LLVM: [[EXIT]]:
+ // LLVM: %[[FLAG1:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 0
+ // LLVM: store i8 %[[FLAG1]], ptr %[[FLAG1_ADDR:.+]], align 1
+ // LLVM: %[[FLAG2:.+]] = load i8, ptr %[[FLAG1_ADDR]], align 1
+ // LLVM: store i8 %[[FLAG2:.+]], ptr %[[FLAG2_ADDR:.+]], align 1
+ // LLVM: %[[RET:.+]] = load i8, ptr %[[FLAG2_ADDR]], align 1
+ // LLVM: ret i8 %[[RET]]
+
+ // OGCG-LABEL: test_mm_aesdec256kl_u8
+ // OGCG: %[[RESULT:.+]] = call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %[[IDATA:.+]], ptr %[[H:.+]])
+ // OGCG: %[[FLAG:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 0
+ // OGCG: %[[SUCC:.+]] = trunc i8 %[[FLAG]] to i1
+ // OGCG: %[[OUT:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 1
+ // OGCG: br i1 %[[SUCC]], label %[[NO_ERROR:.+]], label %[[ERROR:.+]]
+ // OGCG: [[NO_ERROR]]:
+ // OGCG: store <2 x i64> %[[OUT]], ptr %[[ODATA_PTR:.+]], align 16
+ // OGCG: br label %[[EXIT:.+]]
+ // OGCG: [[ERROR]]:
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[ODATA_PTR]], align 16
+ // OGCG: br label %[[EXIT]]
+ // OGCG: [[EXIT]]:
+ // OGCG: %[[RET:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 0
+ // OGCG: ret i8 %[[RET]]
+ return _mm_aesdec256kl_u8(odata, idata, h);
+}
+
+unsigned char test_mm_aesenc128kl_u8(__m128i *odata, __m128i idata, const void *h) {
+ // CIR-LABEL: _mm_aesenc128kl_u8
+ // CIR: %[[RESULT:.+]] = cir.call_llvm_intrinsic "x86.aesenc128kl" %[[IDATA:.+]], %[[H:.+]] : (!cir.vector<2 x !s64i>, !cir.ptr<!void>) -> !rec_anon_struct
+ // CIR: %[[FLAG:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct -> !u8i
+ // CIR: %[[FLAG_BIT0:.+]] = cir.cast integral %[[FLAG]] : !u8i -> !cir.int<u, 1>
+ // CIR: %[[SUCC:.+]] = cir.cast int_to_bool %[[FLAG_BIT0]] : !cir.int<u, 1> -> !cir.bool
+ // CIR: %[[OUT:.+]] = cir.extract_member %[[RESULT]][1] : !rec_anon_struct -> !cir.vector<2 x !s64i>
+ // CIR: cir.if %[[SUCC]] {
+ // CIR: cir.store align(16) %[[OUT]], %[[ODATA_PTR:.+]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: } else {
+ // CIR: %[[NULL:.+]] = cir.const #cir.zero : !cir.vector<2 x !s64i>
+ // CIR: cir.store align(16) %[[NULL]], %[[ODATA_PTR]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: }
+ // CIR: %[[FLAG1:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct -> !u8i
+ // CIR: cir.store %[[FLAG1]], %[[RET_ADDR:.+]] : !u8i, !cir.ptr<!u8i>
+ // CIR: %[[RET:.+]] = cir.load %[[RET_ADDR]] : !cir.ptr<!u8i>, !u8i
+ // CIR: cir.return %[[RET]] : !u8i
+
+ // LLVM-LABEL: _mm_aesenc128kl_u8
+ // LLVM: %[[RESULT:.+]] = call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %[[IDATA:.+]], ptr %[[H:.+]])
+ // LLVM: %[[FLAG:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 0
+ // LLVM: %[[SUCC:.+]] = trunc i8 %[[FLAG]] to i1
+ // LLVM: %[[OUT:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 1
+ // LLVM: br i1 %[[SUCC]], label %[[NO_ERROR:.+]], label %[[ERROR:.+]]
+ // LLVM: [[NO_ERROR]]:
+ // LLVM: store <2 x i64> %[[OUT]], ptr %[[ODATA_PTR:.+]], align 16
+ // LLVM: br label %[[EXIT:.+]]
+ // LLVM: [[ERROR]]:
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[ODATA_PTR]], align 16
+ // LLVM: br label %[[EXIT]]
+ // LLVM: [[EXIT]]:
+ // LLVM: %[[FLAG1:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 0
+ // LLVM: store i8 %[[FLAG1]], ptr %[[FLAG1_ADDR:.+]], align 1
+ // LLVM: %[[FLAG2:.+]] = load i8, ptr %[[FLAG1_ADDR]], align 1
+ // LLVM: store i8 %[[FLAG2:.+]], ptr %[[FLAG2_ADDR:.+]], align 1
+ // LLVM: %[[RET:.+]] = load i8, ptr %[[FLAG2_ADDR]], align 1
+ // LLVM: ret i8 %[[RET]]
+
+ // OGCG-LABEL: test_mm_aesenc128kl_u8
+ // OGCG: %[[RESULT:.+]] = call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %[[IDATA:.+]], ptr %[[H:.+]])
+ // OGCG: %[[FLAG:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 0
+ // OGCG: %[[SUCC:.+]] = trunc i8 %[[FLAG]] to i1
+ // OGCG: %[[OUT:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 1
+ // OGCG: br i1 %[[SUCC]], label %[[NO_ERROR:.+]], label %[[ERROR:.+]]
+ // OGCG: [[NO_ERROR]]:
+ // OGCG: store <2 x i64> %[[OUT]], ptr %[[ODATA_PTR:.+]], align 16
+ // OGCG: br label %[[EXIT:.+]]
+ // OGCG: [[ERROR]]:
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[ODATA_PTR]], align 16
+ // OGCG: br label %[[EXIT]]
+ // OGCG: [[EXIT]]:
+ // OGCG: %[[RET:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 0
+ // OGCG: ret i8 %[[RET]]
+ return _mm_aesenc128kl_u8(odata, idata, h);
+}
+
+unsigned char test_mm_aesdec128kl_u8(__m128i *odata, __m128i idata, const void *h) {
+ // CIR-LABEL: _mm_aesdec128kl_u8
+ // CIR: %[[RESULT:.+]] = cir.call_llvm_intrinsic "x86.aesdec128kl" %[[IDATA:.+]], %[[H:.+]] : (!cir.vector<2 x !s64i>, !cir.ptr<!void>) -> !rec_anon_struct
+ // CIR: %[[FLAG:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct -> !u8i
+ // CIR: %[[FLAG_BIT0:.+]] = cir.cast integral %[[FLAG]] : !u8i -> !cir.int<u, 1>
+ // CIR: %[[SUCC:.+]] = cir.cast int_to_bool %[[FLAG_BIT0]] : !cir.int<u, 1> -> !cir.bool
+ // CIR: %[[OUT:.+]] = cir.extract_member %[[RESULT]][1] : !rec_anon_struct -> !cir.vector<2 x !s64i>
+ // CIR: cir.if %[[SUCC]] {
+ // CIR: cir.store align(16) %[[OUT]], %[[ODATA_PTR:.+]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: } else {
+ // CIR: %[[NULL:.+]] = cir.const #cir.zero : !cir.vector<2 x !s64i>
+ // CIR: cir.store align(16) %[[NULL]], %[[ODATA_PTR]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: }
+ // CIR: %[[FLAG1:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct -> !u8i
+ // CIR: cir.store %[[FLAG1]], %[[RET_ADDR:.+]] : !u8i, !cir.ptr<!u8i>
+ // CIR: %[[RET:.+]] = cir.load %[[RET_ADDR]] : !cir.ptr<!u8i>, !u8i
+ // CIR: cir.return %[[RET]] : !u8i
+
+ // LLVM-LABEL: _mm_aesdec128kl_u8
+ // LLVM: %[[RESULT:.+]] = call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %[[IDATA:.+]], ptr %[[H:.+]])
+ // LLVM: %[[FLAG:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 0
+ // LLVM: %[[SUCC:.+]] = trunc i8 %[[FLAG]] to i1
+ // LLVM: %[[OUT:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 1
+ // LLVM: br i1 %[[SUCC]], label %[[NO_ERROR:.+]], label %[[ERROR:.+]]
+ // LLVM: [[NO_ERROR]]:
+ // LLVM: store <2 x i64> %[[OUT]], ptr %[[ODATA_PTR:.+]], align 16
+ // LLVM: br label %[[EXIT:.+]]
+ // LLVM: [[ERROR]]:
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[ODATA_PTR]], align 16
+ // LLVM: br label %[[EXIT]]
+ // LLVM: [[EXIT]]:
+ // LLVM: %[[FLAG1:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 0
+ // LLVM: store i8 %[[FLAG1]], ptr %[[FLAG1_ADDR:.+]], align 1
+ // LLVM: %[[FLAG2:.+]] = load i8, ptr %[[FLAG1_ADDR]], align 1
+ // LLVM: store i8 %[[FLAG2:.+]], ptr %[[FLAG2_ADDR:.+]], align 1
+ // LLVM: %[[RET:.+]] = load i8, ptr %[[FLAG2_ADDR]], align 1
+ // LLVM: ret i8 %[[RET]]
+
+ // OGCG-LABEL: test_mm_aesdec128kl_u8
+ // OGCG: %[[RESULT:.+]] = call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %[[IDATA:.+]], ptr %[[H:.+]])
+ // OGCG: %[[FLAG:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 0
+ // OGCG: %[[SUCC:.+]] = trunc i8 %[[FLAG]] to i1
+ // OGCG: %[[OUT:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 1
+ // OGCG: br i1 %[[SUCC]], label %[[NO_ERROR:.+]], label %[[ERROR:.+]]
+ // OGCG: [[NO_ERROR]]:
+ // OGCG: store <2 x i64> %[[OUT]], ptr %[[ODATA_PTR:.+]], align 16
+ // OGCG: br label %[[EXIT:.+]]
+ // OGCG: [[ERROR]]:
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[ODATA_PTR]], align 16
+ // OGCG: br label %[[EXIT]]
+ // OGCG: [[EXIT]]:
+ // OGCG: %[[RET:.+]] = extractvalue { i8, <2 x i64> } %[[RESULT]], 0
+ // OGCG: ret i8 %[[RET]]
+ return _mm_aesdec128kl_u8(odata, idata, h);
+}
+
+unsigned char test__mm_aesencwide256kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) {
+ // CIR-LABEL: _mm_aesencwide256kl_u8
+ // CIR: %[[ZERO:.+]] = cir.const #cir.int<0> : !u32i
+ // CIR: %[[IN_ADDR0:.+]] = cir.ptr_stride %[[IDATA_ADDR:.+]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA0:.+]] = cir.load align(16) %[[IN_ADDR0]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[ONE:.+]] = cir.const #cir.int<1> : !u32i
+ // CIR: %[[IN_ADDR1:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA1:.+]] = cir.load align(16) %[[IN_ADDR1]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[TWO:.+]] = cir.const #cir.int<2> : !u32i
+ // CIR: %[[IN_ADDR2:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA2:.+]] = cir.load align(16) %[[IN_ADDR2]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[THREE:.+]] = cir.const #cir.int<3> : !u32i
+ // CIR: %[[IN_ADDR3:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA3:.+]] = cir.load align(16) %[[IN_ADDR3]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[FOUR:.+]] = cir.const #cir.int<4> : !u32i
+ // CIR: %[[IN_ADDR4:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA4:.+]] = cir.load align(16) %[[IN_ADDR4]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[FIVE:.+]] = cir.const #cir.int<5> : !u32i
+ // CIR: %[[IN_ADDR5:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA5:.+]] = cir.load align(16) %[[IN_ADDR5]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[SIX:.+]] = cir.const #cir.int<6> : !u32i
+ // CIR: %[[IN_ADDR6:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA6:.+]] = cir.load align(16) %[[IN_ADDR6]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[SEVEN:.+]] = cir.const #cir.int<7> : !u32i
+ // CIR: %[[IN_ADDR7:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA7:.+]] = cir.load align(16) %[[IN_ADDR7]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[RESULT:.+]] = cir.call_llvm_intrinsic "x86.aesencwide256kl" %[[H_ADDR:.+]], %[[IN_DATA0]], %[[IN_DATA1]], %[[IN_DATA2]], %[[IN_DATA3]], %[[IN_DATA4]], %[[IN_DATA5]], %[[IN_DATA6]], %[[IN_DATA7]] : (!cir.ptr<!void>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>) -> !rec_anon_struct1
+ // CIR: %[[FLAG:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct1 -> !u8i
+ // CIR: %[[FLAG_BIT0:.+]] = cir.cast integral %[[FLAG]] : !u8i -> !cir.int<u, 1>
+ // CIR: %[[SUCC:.+]] = cir.cast int_to_bool %[[FLAG_BIT0]] : !cir.int<u, 1> -> !cir.bool
+ // CIR: cir.if %[[SUCC]] {
+ // CIR: %[[OUT_DATA0:.+]] = cir.extract_member %[[RESULT]][1] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR0:.+]] = cir.ptr_stride %[[ODATA_ADDR:.+]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA0]], %[[OUT_ADDR0]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA1:.+]] = cir.extract_member %[[RESULT]][2] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA1]], %[[OUT_ADDR1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA2:.+]] = cir.extract_member %[[RESULT]][3] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR2:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA2]], %[[OUT_ADDR2]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA3:.+]] = cir.extract_member %[[RESULT]][4] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR3:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA3]], %[[OUT_ADDR3]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA4:.+]] = cir.extract_member %[[RESULT]][5] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR4:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA4]], %[[OUT_ADDR4]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA5:.+]] = cir.extract_member %[[RESULT]][6] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR5:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA5]], %[[OUT_ADDR5]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA6:.+]] = cir.extract_member %[[RESULT]][7] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR6:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA6]], %[[OUT_ADDR6]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA7:.+]] = cir.extract_member %[[RESULT]][8] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR7:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA7]], %[[OUT_ADDR7]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: } else {
+ // CIR: %[[NULL:.+]] = cir.const #cir.zero : !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR0_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR0_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR1_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR1_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR2_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR2_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR3_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR3_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR4_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR4_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR5_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR5_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR6_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR6_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR7_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR7_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: }
+ // CIR: %[[FLAG1:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct1 -> !u8i
+ // CIR: cir.store %[[FLAG1]], %[[RET_ADDR:.+]] : !u8i, !cir.ptr<!u8i>
+ // CIR: %[[RET:.+]] = cir.load %[[RET_ADDR]] : !cir.ptr<!u8i>, !u8i
+ // CIR: cir.return %[[RET]] : !u8i
+
+ // LLVM-LABEL: _mm_aesencwide256kl_u8
+ // LLVM: %[[IN_DATA0:.+]] = load <2 x i64>, ptr %[[IDATA_ADDR:.+]], align 16
+ // LLVM: %[[IN_ADDR1:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 1
+ // LLVM: %[[IN_DATA1:.+]] = load <2 x i64>, ptr %[[IN_ADDR1]], align 16
+ // LLVM: %[[IN_ADDR2:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 2
+ // LLVM: %[[IN_DATA2:.+]] = load <2 x i64>, ptr %[[IN_ADDR2]], align 16
+ // LLVM: %[[IN_ADDR3:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 3
+ // LLVM: %[[IN_DATA3:.+]] = load <2 x i64>, ptr %[[IN_ADDR3]], align 16
+ // LLVM: %[[IN_ADDR4:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 4
+ // LLVM: %[[IN_DATA4:.+]] = load <2 x i64>, ptr %[[IN_ADDR4]], align 16
+ // LLVM: %[[IN_ADDR5:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 5
+ // LLVM: %[[IN_DATA5:.+]] = load <2 x i64>, ptr %[[IN_ADDR5]], align 16
+ // LLVM: %[[IN_ADDR6:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 6
+ // LLVM: %[[IN_DATA6:.+]] = load <2 x i64>, ptr %[[IN_ADDR6]], align 16
+ // LLVM: %[[IN_ADDR7:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 7
+ // LLVM: %[[IN_DATA7:.+]] = load <2 x i64>, ptr %[[IN_ADDR7]], align 16
+ // LLVM: %[[RESULT:.+]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(ptr %[[H_ADDR:.+]], <2 x i64> %[[IN_DATA0]], <2 x i64> %[[IN_DATA1]], <2 x i64> %[[IN_DATA2]], <2 x i64> %[[IN_DATA3]], <2 x i64> %[[IN_DATA4]], <2 x i64> %[[IN_DATA5]], <2 x i64> %[[IN_DATA6]], <2 x i64> %[[IN_DATA7]])
+ // LLVM: %[[FLAG:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 0
+ // LLVM: %[[SUCC:.+]] = trunc i8 %34 to i1
+ // LLVM: br i1 %[[SUCC]], label %[[NO_ERROR:.+]], label %[[ERROR:.+]]
+ // LLVM: [[NO_ERROR]]:
+ // LLVM: %[[OUT_DATA0:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 1
+ // LLVM: store <2 x i64> %[[OUT_DATA0]], ptr %[[ODATA_PTR:.+]], align 16
+ // LLVM: %[[OUT_DATA1:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 2
+ // LLVM: %[[OUT_ADDR1:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 1
+ // LLVM: store <2 x i64> %[[OUT_DATA1]], ptr %[[OUT_ADDR1]], align 16
+ // LLVM: %[[OUT_DATA2:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 3
+ // LLVM: %[[OUT_ADDR2:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 2
+ // LLVM: store <2 x i64> %[[OUT_DATA2]], ptr %[[OUT_ADDR2]], align 16
+ // LLVM: %[[OUT_DATA3:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 4
+ // LLVM: %[[OUT_ADDR3:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 3
+ // LLVM: store <2 x i64> %[[OUT_DATA3]], ptr %[[OUT_ADDR3]], align 16
+ // LLVM: %[[OUT_DATA4:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 5
+ // LLVM: %[[OUT_ADDR4:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 4
+ // LLVM: store <2 x i64> %[[OUT_DATA4]], ptr %[[OUT_ADDR4]], align 16
+ // LLVM: %[[OUT_DATA5:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 6
+ // LLVM: %[[OUT_ADDR5:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 5
+ // LLVM: store <2 x i64> %[[OUT_DATA5]], ptr %[[OUT_ADDR5]], align 16
+ // LLVM: %[[OUT_DATA6:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 7
+ // LLVM: %[[OUT_ADDR6:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 6
+ // LLVM: store <2 x i64> %[[OUT_DATA6]], ptr %[[OUT_ADDR6]], align 16
+ // LLVM: %[[OUT_DATA7:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 8
+ // LLVM: %[[OUT_ADDR7:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 7
+ // LLVM: store <2 x i64> %[[OUT_DATA7]], ptr %[[OUT_ADDR7]], align 16
+ // LLVM: br label %[[EXIT:.+]]
+ // LLVM: [[ERROR]]:
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[ODATA_PTR]], align 16
+ // LLVM: %[[OUT_ADDR0_1:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 1
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_1]], align 16
+ // LLVM: %[[OUT_ADDR0_2:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 2
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_2]], align 16
+ // LLVM: %[[OUT_ADDR0_3:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 3
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_3]], align 16
+ // LLVM: %[[OUT_ADDR0_4:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 4
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_4]], align 16
+ // LLVM: %[[OUT_ADDR0_5:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 5
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_5]], align 16
+ // LLVM: %[[OUT_ADDR0_6:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 6
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_6]], align 16
+ // LLVM: %[[OUT_ADDR0_7:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 7
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_7]], align 16
+ // LLVM: br label %[[EXIT]]
+ // LLVM: [[EXIT]]:
+ // LLVM: %[[FLAG1:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 0
+ // LLVM: store i8 %[[FLAG1]], ptr %[[FLAG1_ADDR:.+]], align 1
+ // LLVM: %[[FLAG2:.+]] = load i8, ptr %[[FLAG1_ADDR]], align 1
+ // LLVM: store i8 %[[FLAG2:.+]], ptr %[[FLAG2_ADDR:.+]], align 1
+ // LLVM: %[[RET:.+]] = load i8, ptr %[[FLAG2_ADDR]], align 1
+ // LLVM: ret i8 %[[RET]]
+
+ // OGCG-LABEL: _mm_aesencwide256kl_u8
+ // OGCG: %[[IN_DATA0:.+]] = load <2 x i64>, ptr %[[IDATA_ADDR:.+]], align 16
+ // OGCG: %[[IN_ADDR1:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 1
+ // OGCG: %[[IN_DATA1:.+]] = load <2 x i64>, ptr %[[IN_ADDR1]], align 16
+ // OGCG: %[[IN_ADDR2:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 2
+ // OGCG: %[[IN_DATA2:.+]] = load <2 x i64>, ptr %[[IN_ADDR2]], align 16
+ // OGCG: %[[IN_ADDR3:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 3
+ // OGCG: %[[IN_DATA3:.+]] = load <2 x i64>, ptr %[[IN_ADDR3]], align 16
+ // OGCG: %[[IN_ADDR4:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 4
+ // OGCG: %[[IN_DATA4:.+]] = load <2 x i64>, ptr %[[IN_ADDR4]], align 16
+ // OGCG: %[[IN_ADDR5:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 5
+ // OGCG: %[[IN_DATA5:.+]] = load <2 x i64>, ptr %[[IN_ADDR5]], align 16
+ // OGCG: %[[IN_ADDR6:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 6
+ // OGCG: %[[IN_DATA6:.+]] = load <2 x i64>, ptr %[[IN_ADDR6]], align 16
+ // OGCG: %[[IN_ADDR7:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 7
+ // OGCG: %[[IN_DATA7:.+]] = load <2 x i64>, ptr %[[IN_ADDR7]], align 16
+ // OGCG: %[[RESULT:.+]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(ptr %[[H_ADDR:.+]], <2 x i64> %[[IN_DATA0]], <2 x i64> %[[IN_DATA1]], <2 x i64> %[[IN_DATA2]], <2 x i64> %[[IN_DATA3]], <2 x i64> %[[IN_DATA4]], <2 x i64> %[[IN_DATA5]], <2 x i64> %[[IN_DATA6]], <2 x i64> %[[IN_DATA7]])
+ // OGCG: %[[FLAG:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 0
+ // OGCG: %[[SUCC:.+]] = trunc i8 %[[FLAG]] to i1
+ // OGCG: br i1 %[[SUCC]], label %[[NO_ERROR:.+]], label %[[ERROR:.+]]
+ // OGCG: [[NO_ERROR]]:
+ // OGCG: %[[OUT_DATA0:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 1
+ // OGCG: store <2 x i64> %[[OUT_DATA0]], ptr %[[ODATA_PTR:.+]], align 16
+ // OGCG: %[[OUT_DATA1:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 2
+ // OGCG: %[[OUT_ADDR1:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 1
+ // OGCG: store <2 x i64> %[[OUT_DATA1]], ptr %[[OUT_ADDR1]], align 16
+ // OGCG: %[[OUT_DATA2:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 3
+ // OGCG: %[[OUT_ADDR2:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 2
+ // OGCG: store <2 x i64> %[[OUT_DATA2]], ptr %[[OUT_ADDR2]], align 16
+ // OGCG: %[[OUT_DATA3:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 4
+ // OGCG: %[[OUT_ADDR3:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 3
+ // OGCG: store <2 x i64> %[[OUT_DATA3]], ptr %[[OUT_ADDR3]], align 16
+ // OGCG: %[[OUT_DATA4:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 5
+ // OGCG: %[[OUT_ADDR4:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 4
+ // OGCG: store <2 x i64> %[[OUT_DATA4]], ptr %[[OUT_ADDR4]], align 16
+ // OGCG: %[[OUT_DATA5:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 6
+ // OGCG: %[[OUT_ADDR5:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 5
+ // OGCG: store <2 x i64> %[[OUT_DATA5]], ptr %[[OUT_ADDR5]], align 16
+ // OGCG: %[[OUT_DATA6:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 7
+ // OGCG: %[[OUT_ADDR6:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 6
+ // OGCG: store <2 x i64> %[[OUT_DATA6]], ptr %[[OUT_ADDR6]], align 16
+ // OGCG: %[[OUT_DATA7:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 8
+ // OGCG: %[[OUT_ADDR7:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 7
+ // OGCG: store <2 x i64> %[[OUT_DATA7]], ptr %[[OUT_ADDR7]], align 16
+ // OGCG: br label %[[EXIT:.+]]
+ // OGCG: [[ERROR]]:
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[ODATA_PTR]], align 16
+ // OGCG: %[[OUT_ADDR0_1:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 1
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_1]], align 16
+ // OGCG: %[[OUT_ADDR0_2:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 2
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_2]], align 16
+ // OGCG: %[[OUT_ADDR0_3:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 3
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_3]], align 16
+ // OGCG: %[[OUT_ADDR0_4:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 4
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_4]], align 16
+ // OGCG: %[[OUT_ADDR0_5:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 5
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_5]], align 16
+ // OGCG: %[[OUT_ADDR0_6:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 6
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_6]], align 16
+ // OGCG: %[[OUT_ADDR0_7:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 7
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_7]], align 16
+ // OGCG: br label %[[EXIT]]
+ // OGCG: [[EXIT]]:
+ // OGCG: %[[RET:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 0
+ // OGCG: ret i8 %[[RET]]
+ return _mm_aesencwide256kl_u8(odata, idata, h);
+}
+
+unsigned char test__mm_aesdecwide256kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) {
+ // CIR-LABEL: _mm_aesdecwide256kl_u8
+ // CIR: %[[ZERO:.+]] = cir.const #cir.int<0> : !u32i
+ // CIR: %[[IN_ADDR0:.+]] = cir.ptr_stride %[[IDATA_ADDR:.+]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA0:.+]] = cir.load align(16) %[[IN_ADDR0]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[ONE:.+]] = cir.const #cir.int<1> : !u32i
+ // CIR: %[[IN_ADDR1:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA1:.+]] = cir.load align(16) %[[IN_ADDR1]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[TWO:.+]] = cir.const #cir.int<2> : !u32i
+ // CIR: %[[IN_ADDR2:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA2:.+]] = cir.load align(16) %[[IN_ADDR2]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[THREE:.+]] = cir.const #cir.int<3> : !u32i
+ // CIR: %[[IN_ADDR3:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA3:.+]] = cir.load align(16) %[[IN_ADDR3]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[FOUR:.+]] = cir.const #cir.int<4> : !u32i
+ // CIR: %[[IN_ADDR4:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA4:.+]] = cir.load align(16) %[[IN_ADDR4]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[FIVE:.+]] = cir.const #cir.int<5> : !u32i
+ // CIR: %[[IN_ADDR5:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA5:.+]] = cir.load align(16) %[[IN_ADDR5]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[SIX:.+]] = cir.const #cir.int<6> : !u32i
+ // CIR: %[[IN_ADDR6:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA6:.+]] = cir.load align(16) %[[IN_ADDR6]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[SEVEN:.+]] = cir.const #cir.int<7> : !u32i
+ // CIR: %[[IN_ADDR7:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA7:.+]] = cir.load align(16) %[[IN_ADDR7]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[RESULT:.+]] = cir.call_llvm_intrinsic "x86.aesdecwide256kl" %[[H_ADDR:.+]], %[[IN_DATA0]], %[[IN_DATA1]], %[[IN_DATA2]], %[[IN_DATA3]], %[[IN_DATA4]], %[[IN_DATA5]], %[[IN_DATA6]], %[[IN_DATA7]] : (!cir.ptr<!void>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>) -> !rec_anon_struct1
+ // CIR: %[[FLAG:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct1 -> !u8i
+ // CIR: %[[FLAG_BIT0:.+]] = cir.cast integral %[[FLAG]] : !u8i -> !cir.int<u, 1>
+ // CIR: %[[SUCC:.+]] = cir.cast int_to_bool %[[FLAG_BIT0]] : !cir.int<u, 1> -> !cir.bool
+ // CIR: cir.if %[[SUCC]] {
+ // CIR: %[[OUT_DATA0:.+]] = cir.extract_member %[[RESULT]][1] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR0:.+]] = cir.ptr_stride %[[ODATA_ADDR:.+]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA0]], %[[OUT_ADDR0]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA1:.+]] = cir.extract_member %[[RESULT]][2] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA1]], %[[OUT_ADDR1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA2:.+]] = cir.extract_member %[[RESULT]][3] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR2:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA2]], %[[OUT_ADDR2]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA3:.+]] = cir.extract_member %[[RESULT]][4] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR3:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA3]], %[[OUT_ADDR3]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA4:.+]] = cir.extract_member %[[RESULT]][5] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR4:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA4]], %[[OUT_ADDR4]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA5:.+]] = cir.extract_member %[[RESULT]][6] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR5:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA5]], %[[OUT_ADDR5]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA6:.+]] = cir.extract_member %[[RESULT]][7] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR6:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA6]], %[[OUT_ADDR6]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA7:.+]] = cir.extract_member %[[RESULT]][8] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR7:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA7]], %[[OUT_ADDR7]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: } else {
+ // CIR: %[[NULL:.+]] = cir.const #cir.zero : !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR0_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR0_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR1_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR1_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR2_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR2_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR3_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR3_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR4_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR4_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR5_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR5_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR6_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR6_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR7_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR7_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: }
+ // CIR: %[[FLAG1:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct1 -> !u8i
+ // CIR: cir.store %[[FLAG1]], %[[RET_ADDR:.+]] : !u8i, !cir.ptr<!u8i>
+ // CIR: %[[RET:.+]] = cir.load %[[RET_ADDR]] : !cir.ptr<!u8i>, !u8i
+ // CIR: cir.return %[[RET]] : !u8i
+
+ // LLVM-LABEL: _mm_aesdecwide256kl_u8
+ // LLVM: %[[IN_DATA0:.+]] = load <2 x i64>, ptr %[[IDATA_ADDR:.+]], align 16
+ // LLVM: %[[IN_ADDR1:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 1
+ // LLVM: %[[IN_DATA1:.+]] = load <2 x i64>, ptr %[[IN_ADDR1]], align 16
+ // LLVM: %[[IN_ADDR2:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 2
+ // LLVM: %[[IN_DATA2:.+]] = load <2 x i64>, ptr %[[IN_ADDR2]], align 16
+ // LLVM: %[[IN_ADDR3:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 3
+ // LLVM: %[[IN_DATA3:.+]] = load <2 x i64>, ptr %[[IN_ADDR3]], align 16
+ // LLVM: %[[IN_ADDR4:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 4
+ // LLVM: %[[IN_DATA4:.+]] = load <2 x i64>, ptr %[[IN_ADDR4]], align 16
+ // LLVM: %[[IN_ADDR5:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 5
+ // LLVM: %[[IN_DATA5:.+]] = load <2 x i64>, ptr %[[IN_ADDR5]], align 16
+ // LLVM: %[[IN_ADDR6:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 6
+ // LLVM: %[[IN_DATA6:.+]] = load <2 x i64>, ptr %[[IN_ADDR6]], align 16
+ // LLVM: %[[IN_ADDR7:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 7
+ // LLVM: %[[IN_DATA7:.+]] = load <2 x i64>, ptr %[[IN_ADDR7]], align 16
+ // LLVM: %[[RESULT:.+]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr %[[H_ADDR:.+]], <2 x i64> %[[IN_DATA0]], <2 x i64> %[[IN_DATA1]], <2 x i64> %[[IN_DATA2]], <2 x i64> %[[IN_DATA3]], <2 x i64> %[[IN_DATA4]], <2 x i64> %[[IN_DATA5]], <2 x i64> %[[IN_DATA6]], <2 x i64> %[[IN_DATA7]])
+ // LLVM: %[[FLAG:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 0
+ // LLVM: %[[SUCC:.+]] = trunc i8 %34 to i1
+ // LLVM: br i1 %[[SUCC]], label %[[NO_ERROR:.+]], label %[[ERROR:.+]]
+ // LLVM: [[NO_ERROR]]:
+ // LLVM: %[[OUT_DATA0:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 1
+ // LLVM: store <2 x i64> %[[OUT_DATA0]], ptr %[[ODATA_PTR:.+]], align 16
+ // LLVM: %[[OUT_DATA1:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 2
+ // LLVM: %[[OUT_ADDR1:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 1
+ // LLVM: store <2 x i64> %[[OUT_DATA1]], ptr %[[OUT_ADDR1]], align 16
+ // LLVM: %[[OUT_DATA2:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 3
+ // LLVM: %[[OUT_ADDR2:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 2
+ // LLVM: store <2 x i64> %[[OUT_DATA2]], ptr %[[OUT_ADDR2]], align 16
+ // LLVM: %[[OUT_DATA3:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 4
+ // LLVM: %[[OUT_ADDR3:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 3
+ // LLVM: store <2 x i64> %[[OUT_DATA3]], ptr %[[OUT_ADDR3]], align 16
+ // LLVM: %[[OUT_DATA4:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 5
+ // LLVM: %[[OUT_ADDR4:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 4
+ // LLVM: store <2 x i64> %[[OUT_DATA4]], ptr %[[OUT_ADDR4]], align 16
+ // LLVM: %[[OUT_DATA5:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 6
+ // LLVM: %[[OUT_ADDR5:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 5
+ // LLVM: store <2 x i64> %[[OUT_DATA5]], ptr %[[OUT_ADDR5]], align 16
+ // LLVM: %[[OUT_DATA6:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 7
+ // LLVM: %[[OUT_ADDR6:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 6
+ // LLVM: store <2 x i64> %[[OUT_DATA6]], ptr %[[OUT_ADDR6]], align 16
+ // LLVM: %[[OUT_DATA7:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 8
+ // LLVM: %[[OUT_ADDR7:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 7
+ // LLVM: store <2 x i64> %[[OUT_DATA7]], ptr %[[OUT_ADDR7]], align 16
+ // LLVM: br label %[[EXIT:.+]]
+ // LLVM: [[ERROR]]:
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[ODATA_PTR]], align 16
+ // LLVM: %[[OUT_ADDR0_1:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 1
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_1]], align 16
+ // LLVM: %[[OUT_ADDR0_2:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 2
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_2]], align 16
+ // LLVM: %[[OUT_ADDR0_3:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 3
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_3]], align 16
+ // LLVM: %[[OUT_ADDR0_4:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 4
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_4]], align 16
+ // LLVM: %[[OUT_ADDR0_5:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 5
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_5]], align 16
+ // LLVM: %[[OUT_ADDR0_6:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 6
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_6]], align 16
+ // LLVM: %[[OUT_ADDR0_7:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 7
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_7]], align 16
+ // LLVM: br label %[[EXIT]]
+ // LLVM: [[EXIT]]:
+ // LLVM: %[[FLAG1:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 0
+ // LLVM: store i8 %[[FLAG1]], ptr %[[FLAG1_ADDR:.+]], align 1
+ // LLVM: %[[FLAG2:.+]] = load i8, ptr %[[FLAG1_ADDR]], align 1
+ // LLVM: store i8 %[[FLAG2:.+]], ptr %[[FLAG2_ADDR:.+]], align 1
+ // LLVM: %[[RET:.+]] = load i8, ptr %[[FLAG2_ADDR]], align 1
+ // LLVM: ret i8 %[[RET]]
+
+ // OGCG-LABEL: _mm_aesdecwide256kl_u8
+ // OGCG: %[[IN_DATA0:.+]] = load <2 x i64>, ptr %[[IDATA_ADDR:.+]], align 16
+ // OGCG: %[[IN_ADDR1:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 1
+ // OGCG: %[[IN_DATA1:.+]] = load <2 x i64>, ptr %[[IN_ADDR1]], align 16
+ // OGCG: %[[IN_ADDR2:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 2
+ // OGCG: %[[IN_DATA2:.+]] = load <2 x i64>, ptr %[[IN_ADDR2]], align 16
+ // OGCG: %[[IN_ADDR3:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 3
+ // OGCG: %[[IN_DATA3:.+]] = load <2 x i64>, ptr %[[IN_ADDR3]], align 16
+ // OGCG: %[[IN_ADDR4:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 4
+ // OGCG: %[[IN_DATA4:.+]] = load <2 x i64>, ptr %[[IN_ADDR4]], align 16
+ // OGCG: %[[IN_ADDR5:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 5
+ // OGCG: %[[IN_DATA5:.+]] = load <2 x i64>, ptr %[[IN_ADDR5]], align 16
+ // OGCG: %[[IN_ADDR6:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 6
+ // OGCG: %[[IN_DATA6:.+]] = load <2 x i64>, ptr %[[IN_ADDR6]], align 16
+ // OGCG: %[[IN_ADDR7:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 7
+ // OGCG: %[[IN_DATA7:.+]] = load <2 x i64>, ptr %[[IN_ADDR7]], align 16
+ // OGCG: %[[RESULT:.+]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr %[[H_ADDR:.+]], <2 x i64> %[[IN_DATA0]], <2 x i64> %[[IN_DATA1]], <2 x i64> %[[IN_DATA2]], <2 x i64> %[[IN_DATA3]], <2 x i64> %[[IN_DATA4]], <2 x i64> %[[IN_DATA5]], <2 x i64> %[[IN_DATA6]], <2 x i64> %[[IN_DATA7]])
+ // OGCG: %[[FLAG:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 0
+ // OGCG: %[[SUCC:.+]] = trunc i8 %[[FLAG]] to i1
+ // OGCG: br i1 %[[SUCC]], label %[[NO_ERROR:.+]], label %[[ERROR:.+]]
+ // OGCG: [[NO_ERROR]]:
+ // OGCG: %[[OUT_DATA0:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 1
+ // OGCG: store <2 x i64> %[[OUT_DATA0]], ptr %[[ODATA_PTR:.+]], align 16
+ // OGCG: %[[OUT_DATA1:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 2
+ // OGCG: %[[OUT_ADDR1:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 1
+ // OGCG: store <2 x i64> %[[OUT_DATA1]], ptr %[[OUT_ADDR1]], align 16
+ // OGCG: %[[OUT_DATA2:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 3
+ // OGCG: %[[OUT_ADDR2:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 2
+ // OGCG: store <2 x i64> %[[OUT_DATA2]], ptr %[[OUT_ADDR2]], align 16
+ // OGCG: %[[OUT_DATA3:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 4
+ // OGCG: %[[OUT_ADDR3:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 3
+ // OGCG: store <2 x i64> %[[OUT_DATA3]], ptr %[[OUT_ADDR3]], align 16
+ // OGCG: %[[OUT_DATA4:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 5
+ // OGCG: %[[OUT_ADDR4:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 4
+ // OGCG: store <2 x i64> %[[OUT_DATA4]], ptr %[[OUT_ADDR4]], align 16
+ // OGCG: %[[OUT_DATA5:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 6
+ // OGCG: %[[OUT_ADDR5:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 5
+ // OGCG: store <2 x i64> %[[OUT_DATA5]], ptr %[[OUT_ADDR5]], align 16
+ // OGCG: %[[OUT_DATA6:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 7
+ // OGCG: %[[OUT_ADDR6:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 6
+ // OGCG: store <2 x i64> %[[OUT_DATA6]], ptr %[[OUT_ADDR6]], align 16
+ // OGCG: %[[OUT_DATA7:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 8
+ // OGCG: %[[OUT_ADDR7:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 7
+ // OGCG: store <2 x i64> %[[OUT_DATA7]], ptr %[[OUT_ADDR7]], align 16
+ // OGCG: br label %[[EXIT:.+]]
+ // OGCG: [[ERROR]]:
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[ODATA_PTR]], align 16
+ // OGCG: %[[OUT_ADDR0_1:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 1
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_1]], align 16
+ // OGCG: %[[OUT_ADDR0_2:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 2
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_2]], align 16
+ // OGCG: %[[OUT_ADDR0_3:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 3
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_3]], align 16
+ // OGCG: %[[OUT_ADDR0_4:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 4
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_4]], align 16
+ // OGCG: %[[OUT_ADDR0_5:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 5
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_5]], align 16
+ // OGCG: %[[OUT_ADDR0_6:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 6
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_6]], align 16
+ // OGCG: %[[OUT_ADDR0_7:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 7
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_7]], align 16
+ // OGCG: br label %[[EXIT]]
+ // OGCG: [[EXIT]]:
+ // OGCG: %[[RET:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 0
+ // OGCG: ret i8 %[[RET]]
+ return _mm_aesdecwide256kl_u8(odata, idata, h);
+}
+
+unsigned char test__mm_aesencwide128kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) {
+ // CIR-LABEL: _mm_aesencwide128kl_u8
+ // CIR: %[[ZERO:.+]] = cir.const #cir.int<0> : !u32i
+ // CIR: %[[IN_ADDR0:.+]] = cir.ptr_stride %[[IDATA_ADDR:.+]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA0:.+]] = cir.load align(16) %[[IN_ADDR0]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[ONE:.+]] = cir.const #cir.int<1> : !u32i
+ // CIR: %[[IN_ADDR1:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA1:.+]] = cir.load align(16) %[[IN_ADDR1]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[TWO:.+]] = cir.const #cir.int<2> : !u32i
+ // CIR: %[[IN_ADDR2:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA2:.+]] = cir.load align(16) %[[IN_ADDR2]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[THREE:.+]] = cir.const #cir.int<3> : !u32i
+ // CIR: %[[IN_ADDR3:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA3:.+]] = cir.load align(16) %[[IN_ADDR3]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[FOUR:.+]] = cir.const #cir.int<4> : !u32i
+ // CIR: %[[IN_ADDR4:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA4:.+]] = cir.load align(16) %[[IN_ADDR4]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[FIVE:.+]] = cir.const #cir.int<5> : !u32i
+ // CIR: %[[IN_ADDR5:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA5:.+]] = cir.load align(16) %[[IN_ADDR5]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[SIX:.+]] = cir.const #cir.int<6> : !u32i
+ // CIR: %[[IN_ADDR6:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA6:.+]] = cir.load align(16) %[[IN_ADDR6]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[SEVEN:.+]] = cir.const #cir.int<7> : !u32i
+ // CIR: %[[IN_ADDR7:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA7:.+]] = cir.load align(16) %[[IN_ADDR7]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[RESULT:.+]] = cir.call_llvm_intrinsic "x86.aesencwide128kl" %[[H_ADDR:.+]], %[[IN_DATA0]], %[[IN_DATA1]], %[[IN_DATA2]], %[[IN_DATA3]], %[[IN_DATA4]], %[[IN_DATA5]], %[[IN_DATA6]], %[[IN_DATA7]] : (!cir.ptr<!void>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>) -> !rec_anon_struct1
+ // CIR: %[[FLAG:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct1 -> !u8i
+ // CIR: %[[FLAG_BIT0:.+]] = cir.cast integral %[[FLAG]] : !u8i -> !cir.int<u, 1>
+ // CIR: %[[SUCC:.+]] = cir.cast int_to_bool %[[FLAG_BIT0]] : !cir.int<u, 1> -> !cir.bool
+ // CIR: cir.if %[[SUCC]] {
+ // CIR: %[[OUT_DATA0:.+]] = cir.extract_member %[[RESULT]][1] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR0:.+]] = cir.ptr_stride %[[ODATA_ADDR:.+]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA0]], %[[OUT_ADDR0]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA1:.+]] = cir.extract_member %[[RESULT]][2] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA1]], %[[OUT_ADDR1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA2:.+]] = cir.extract_member %[[RESULT]][3] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR2:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA2]], %[[OUT_ADDR2]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA3:.+]] = cir.extract_member %[[RESULT]][4] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR3:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA3]], %[[OUT_ADDR3]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA4:.+]] = cir.extract_member %[[RESULT]][5] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR4:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA4]], %[[OUT_ADDR4]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA5:.+]] = cir.extract_member %[[RESULT]][6] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR5:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA5]], %[[OUT_ADDR5]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA6:.+]] = cir.extract_member %[[RESULT]][7] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR6:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA6]], %[[OUT_ADDR6]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA7:.+]] = cir.extract_member %[[RESULT]][8] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR7:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA7]], %[[OUT_ADDR7]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: } else {
+ // CIR: %[[NULL:.+]] = cir.const #cir.zero : !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR0_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR0_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR1_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR1_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR2_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR2_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR3_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR3_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR4_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR4_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR5_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR5_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR6_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR6_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR7_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR7_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: }
+ // CIR: %[[FLAG1:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct1 -> !u8i
+ // CIR: cir.store %[[FLAG1]], %[[RET_ADDR:.+]] : !u8i, !cir.ptr<!u8i>
+ // CIR: %[[RET:.+]] = cir.load %[[RET_ADDR]] : !cir.ptr<!u8i>, !u8i
+ // CIR: cir.return %[[RET]] : !u8i
+
+ // LLVM-LABEL: _mm_aesencwide128kl_u8
+ // LLVM: %[[IN_DATA0:.+]] = load <2 x i64>, ptr %[[IDATA_ADDR:.+]], align 16
+ // LLVM: %[[IN_ADDR1:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 1
+ // LLVM: %[[IN_DATA1:.+]] = load <2 x i64>, ptr %[[IN_ADDR1]], align 16
+ // LLVM: %[[IN_ADDR2:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 2
+ // LLVM: %[[IN_DATA2:.+]] = load <2 x i64>, ptr %[[IN_ADDR2]], align 16
+ // LLVM: %[[IN_ADDR3:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 3
+ // LLVM: %[[IN_DATA3:.+]] = load <2 x i64>, ptr %[[IN_ADDR3]], align 16
+ // LLVM: %[[IN_ADDR4:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 4
+ // LLVM: %[[IN_DATA4:.+]] = load <2 x i64>, ptr %[[IN_ADDR4]], align 16
+ // LLVM: %[[IN_ADDR5:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 5
+ // LLVM: %[[IN_DATA5:.+]] = load <2 x i64>, ptr %[[IN_ADDR5]], align 16
+ // LLVM: %[[IN_ADDR6:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 6
+ // LLVM: %[[IN_DATA6:.+]] = load <2 x i64>, ptr %[[IN_ADDR6]], align 16
+ // LLVM: %[[IN_ADDR7:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 7
+ // LLVM: %[[IN_DATA7:.+]] = load <2 x i64>, ptr %[[IN_ADDR7]], align 16
+ // LLVM: %[[RESULT:.+]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(ptr %[[H_ADDR:.+]], <2 x i64> %[[IN_DATA0]], <2 x i64> %[[IN_DATA1]], <2 x i64> %[[IN_DATA2]], <2 x i64> %[[IN_DATA3]], <2 x i64> %[[IN_DATA4]], <2 x i64> %[[IN_DATA5]], <2 x i64> %[[IN_DATA6]], <2 x i64> %[[IN_DATA7]])
+ // LLVM: %[[FLAG:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 0
+ // LLVM: %[[SUCC:.+]] = trunc i8 %34 to i1
+ // LLVM: br i1 %[[SUCC]], label %[[NO_ERROR:.+]], label %[[ERROR:.+]]
+ // LLVM: [[NO_ERROR]]:
+ // LLVM: %[[OUT_DATA0:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 1
+ // LLVM: store <2 x i64> %[[OUT_DATA0]], ptr %[[ODATA_PTR:.+]], align 16
+ // LLVM: %[[OUT_DATA1:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 2
+ // LLVM: %[[OUT_ADDR1:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 1
+ // LLVM: store <2 x i64> %[[OUT_DATA1]], ptr %[[OUT_ADDR1]], align 16
+ // LLVM: %[[OUT_DATA2:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 3
+ // LLVM: %[[OUT_ADDR2:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 2
+ // LLVM: store <2 x i64> %[[OUT_DATA2]], ptr %[[OUT_ADDR2]], align 16
+ // LLVM: %[[OUT_DATA3:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 4
+ // LLVM: %[[OUT_ADDR3:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 3
+ // LLVM: store <2 x i64> %[[OUT_DATA3]], ptr %[[OUT_ADDR3]], align 16
+ // LLVM: %[[OUT_DATA4:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 5
+ // LLVM: %[[OUT_ADDR4:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 4
+ // LLVM: store <2 x i64> %[[OUT_DATA4]], ptr %[[OUT_ADDR4]], align 16
+ // LLVM: %[[OUT_DATA5:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 6
+ // LLVM: %[[OUT_ADDR5:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 5
+ // LLVM: store <2 x i64> %[[OUT_DATA5]], ptr %[[OUT_ADDR5]], align 16
+ // LLVM: %[[OUT_DATA6:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 7
+ // LLVM: %[[OUT_ADDR6:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 6
+ // LLVM: store <2 x i64> %[[OUT_DATA6]], ptr %[[OUT_ADDR6]], align 16
+ // LLVM: %[[OUT_DATA7:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 8
+ // LLVM: %[[OUT_ADDR7:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 7
+ // LLVM: store <2 x i64> %[[OUT_DATA7]], ptr %[[OUT_ADDR7]], align 16
+ // LLVM: br label %[[EXIT:.+]]
+ // LLVM: [[ERROR]]:
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[ODATA_PTR]], align 16
+ // LLVM: %[[OUT_ADDR0_1:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 1
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_1]], align 16
+ // LLVM: %[[OUT_ADDR0_2:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 2
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_2]], align 16
+ // LLVM: %[[OUT_ADDR0_3:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 3
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_3]], align 16
+ // LLVM: %[[OUT_ADDR0_4:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 4
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_4]], align 16
+ // LLVM: %[[OUT_ADDR0_5:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 5
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_5]], align 16
+ // LLVM: %[[OUT_ADDR0_6:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 6
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_6]], align 16
+ // LLVM: %[[OUT_ADDR0_7:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 7
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_7]], align 16
+ // LLVM: br label %[[EXIT]]
+ // LLVM: [[EXIT]]:
+ // LLVM: %[[FLAG1:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 0
+ // LLVM: store i8 %[[FLAG1]], ptr %[[FLAG1_ADDR:.+]], align 1
+ // LLVM: %[[FLAG2:.+]] = load i8, ptr %[[FLAG1_ADDR]], align 1
+ // LLVM: store i8 %[[FLAG2:.+]], ptr %[[FLAG2_ADDR:.+]], align 1
+ // LLVM: %[[RET:.+]] = load i8, ptr %[[FLAG2_ADDR]], align 1
+ // LLVM: ret i8 %[[RET]]
+
+ // OGCG-LABEL: _mm_aesencwide128kl_u8
+ // OGCG: %[[IN_DATA0:.+]] = load <2 x i64>, ptr %[[IDATA_ADDR:.+]], align 16
+ // OGCG: %[[IN_ADDR1:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 1
+ // OGCG: %[[IN_DATA1:.+]] = load <2 x i64>, ptr %[[IN_ADDR1]], align 16
+ // OGCG: %[[IN_ADDR2:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 2
+ // OGCG: %[[IN_DATA2:.+]] = load <2 x i64>, ptr %[[IN_ADDR2]], align 16
+ // OGCG: %[[IN_ADDR3:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 3
+ // OGCG: %[[IN_DATA3:.+]] = load <2 x i64>, ptr %[[IN_ADDR3]], align 16
+ // OGCG: %[[IN_ADDR4:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 4
+ // OGCG: %[[IN_DATA4:.+]] = load <2 x i64>, ptr %[[IN_ADDR4]], align 16
+ // OGCG: %[[IN_ADDR5:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 5
+ // OGCG: %[[IN_DATA5:.+]] = load <2 x i64>, ptr %[[IN_ADDR5]], align 16
+ // OGCG: %[[IN_ADDR6:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 6
+ // OGCG: %[[IN_DATA6:.+]] = load <2 x i64>, ptr %[[IN_ADDR6]], align 16
+ // OGCG: %[[IN_ADDR7:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 7
+ // OGCG: %[[IN_DATA7:.+]] = load <2 x i64>, ptr %[[IN_ADDR7]], align 16
+ // OGCG: %[[RESULT:.+]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(ptr %[[H_ADDR:.+]], <2 x i64> %[[IN_DATA0]], <2 x i64> %[[IN_DATA1]], <2 x i64> %[[IN_DATA2]], <2 x i64> %[[IN_DATA3]], <2 x i64> %[[IN_DATA4]], <2 x i64> %[[IN_DATA5]], <2 x i64> %[[IN_DATA6]], <2 x i64> %[[IN_DATA7]])
+ // OGCG: %[[FLAG:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 0
+ // OGCG: %[[SUCC:.+]] = trunc i8 %[[FLAG]] to i1
+ // OGCG: br i1 %[[SUCC]], label %[[NO_ERROR:.+]], label %[[ERROR:.+]]
+ // OGCG: [[NO_ERROR]]:
+ // OGCG: %[[OUT_DATA0:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 1
+ // OGCG: store <2 x i64> %[[OUT_DATA0]], ptr %[[ODATA_PTR:.+]], align 16
+ // OGCG: %[[OUT_DATA1:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 2
+ // OGCG: %[[OUT_ADDR1:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 1
+ // OGCG: store <2 x i64> %[[OUT_DATA1]], ptr %[[OUT_ADDR1]], align 16
+ // OGCG: %[[OUT_DATA2:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 3
+ // OGCG: %[[OUT_ADDR2:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 2
+ // OGCG: store <2 x i64> %[[OUT_DATA2]], ptr %[[OUT_ADDR2]], align 16
+ // OGCG: %[[OUT_DATA3:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 4
+ // OGCG: %[[OUT_ADDR3:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 3
+ // OGCG: store <2 x i64> %[[OUT_DATA3]], ptr %[[OUT_ADDR3]], align 16
+ // OGCG: %[[OUT_DATA4:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 5
+ // OGCG: %[[OUT_ADDR4:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 4
+ // OGCG: store <2 x i64> %[[OUT_DATA4]], ptr %[[OUT_ADDR4]], align 16
+ // OGCG: %[[OUT_DATA5:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 6
+ // OGCG: %[[OUT_ADDR5:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 5
+ // OGCG: store <2 x i64> %[[OUT_DATA5]], ptr %[[OUT_ADDR5]], align 16
+ // OGCG: %[[OUT_DATA6:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 7
+ // OGCG: %[[OUT_ADDR6:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 6
+ // OGCG: store <2 x i64> %[[OUT_DATA6]], ptr %[[OUT_ADDR6]], align 16
+ // OGCG: %[[OUT_DATA7:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 8
+ // OGCG: %[[OUT_ADDR7:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 7
+ // OGCG: store <2 x i64> %[[OUT_DATA7]], ptr %[[OUT_ADDR7]], align 16
+ // OGCG: br label %[[EXIT:.+]]
+ // OGCG: [[ERROR]]:
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[ODATA_PTR]], align 16
+ // OGCG: %[[OUT_ADDR0_1:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 1
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_1]], align 16
+ // OGCG: %[[OUT_ADDR0_2:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 2
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_2]], align 16
+ // OGCG: %[[OUT_ADDR0_3:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 3
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_3]], align 16
+ // OGCG: %[[OUT_ADDR0_4:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 4
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_4]], align 16
+ // OGCG: %[[OUT_ADDR0_5:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 5
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_5]], align 16
+ // OGCG: %[[OUT_ADDR0_6:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 6
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_6]], align 16
+ // OGCG: %[[OUT_ADDR0_7:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 7
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_7]], align 16
+ // OGCG: br label %[[EXIT]]
+ // OGCG: [[EXIT]]:
+ // OGCG: %[[RET:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 0
+ // OGCG: ret i8 %[[RET]]
+ return _mm_aesencwide128kl_u8(odata, idata, h);
+}
+
+unsigned char test__mm_aesdecwide128kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) {
+ // CIR-LABEL: _mm_aesdecwide128kl_u8
+ // CIR: %[[ZERO:.+]] = cir.const #cir.int<0> : !u32i
+ // CIR: %[[IN_ADDR0:.+]] = cir.ptr_stride %[[IDATA_ADDR:.+]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA0:.+]] = cir.load align(16) %[[IN_ADDR0]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[ONE:.+]] = cir.const #cir.int<1> : !u32i
+ // CIR: %[[IN_ADDR1:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA1:.+]] = cir.load align(16) %[[IN_ADDR1]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[TWO:.+]] = cir.const #cir.int<2> : !u32i
+ // CIR: %[[IN_ADDR2:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA2:.+]] = cir.load align(16) %[[IN_ADDR2]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[THREE:.+]] = cir.const #cir.int<3> : !u32i
+ // CIR: %[[IN_ADDR3:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA3:.+]] = cir.load align(16) %[[IN_ADDR3]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[FOUR:.+]] = cir.const #cir.int<4> : !u32i
+ // CIR: %[[IN_ADDR4:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA4:.+]] = cir.load align(16) %[[IN_ADDR4]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[FIVE:.+]] = cir.const #cir.int<5> : !u32i
+ // CIR: %[[IN_ADDR5:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA5:.+]] = cir.load align(16) %[[IN_ADDR5]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[SIX:.+]] = cir.const #cir.int<6> : !u32i
+ // CIR: %[[IN_ADDR6:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA6:.+]] = cir.load align(16) %[[IN_ADDR6]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[SEVEN:.+]] = cir.const #cir.int<7> : !u32i
+ // CIR: %[[IN_ADDR7:.+]] = cir.ptr_stride %[[IDATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[IN_DATA7:.+]] = cir.load align(16) %[[IN_ADDR7]] : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[RESULT:.+]] = cir.call_llvm_intrinsic "x86.aesdecwide128kl" %[[H_ADDR:.+]], %[[IN_DATA0]], %[[IN_DATA1]], %[[IN_DATA2]], %[[IN_DATA3]], %[[IN_DATA4]], %[[IN_DATA5]], %[[IN_DATA6]], %[[IN_DATA7]] : (!cir.ptr<!void>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>) -> !rec_anon_struct1
+ // CIR: %[[FLAG:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct1 -> !u8i
+ // CIR: %[[FLAG_BIT0:.+]] = cir.cast integral %[[FLAG]] : !u8i -> !cir.int<u, 1>
+ // CIR: %[[SUCC:.+]] = cir.cast int_to_bool %[[FLAG_BIT0]] : !cir.int<u, 1> -> !cir.bool
+ // CIR: cir.if %[[SUCC]] {
+ // CIR: %[[OUT_DATA0:.+]] = cir.extract_member %[[RESULT]][1] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR0:.+]] = cir.ptr_stride %[[ODATA_ADDR:.+]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA0]], %[[OUT_ADDR0]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA1:.+]] = cir.extract_member %[[RESULT]][2] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA1]], %[[OUT_ADDR1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA2:.+]] = cir.extract_member %[[RESULT]][3] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR2:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA2]], %[[OUT_ADDR2]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA3:.+]] = cir.extract_member %[[RESULT]][4] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR3:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA3]], %[[OUT_ADDR3]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA4:.+]] = cir.extract_member %[[RESULT]][5] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR4:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA4]], %[[OUT_ADDR4]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA5:.+]] = cir.extract_member %[[RESULT]][6] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR5:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA5]], %[[OUT_ADDR5]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA6:.+]] = cir.extract_member %[[RESULT]][7] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR6:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA6]], %[[OUT_ADDR6]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_DATA7:.+]] = cir.extract_member %[[RESULT]][8] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR7:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[OUT_DATA7]], %[[OUT_ADDR7]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: } else {
+ // CIR: %[[NULL:.+]] = cir.const #cir.zero : !cir.vector<2 x !s64i>
+ // CIR: %[[OUT_ADDR0_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR0_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR1_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR1_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR2_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR2_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR3_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR3_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR4_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR4_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR5_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR5_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR6_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR6_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[OUT_ADDR7_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR7_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: }
+ // CIR: %[[FLAG1:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct1 -> !u8i
+ // CIR: cir.store %[[FLAG1]], %[[RET_ADDR:.+]] : !u8i, !cir.ptr<!u8i>
+ // CIR: %[[RET:.+]] = cir.load %[[RET_ADDR]] : !cir.ptr<!u8i>, !u8i
+ // CIR: cir.return %[[RET]] : !u8i
+
+ // LLVM-LABEL: _mm_aesdecwide128kl_u8
+ // LLVM: %[[IN_DATA0:.+]] = load <2 x i64>, ptr %[[IDATA_ADDR:.+]], align 16
+ // LLVM: %[[IN_ADDR1:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 1
+ // LLVM: %[[IN_DATA1:.+]] = load <2 x i64>, ptr %[[IN_ADDR1]], align 16
+ // LLVM: %[[IN_ADDR2:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 2
+ // LLVM: %[[IN_DATA2:.+]] = load <2 x i64>, ptr %[[IN_ADDR2]], align 16
+ // LLVM: %[[IN_ADDR3:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 3
+ // LLVM: %[[IN_DATA3:.+]] = load <2 x i64>, ptr %[[IN_ADDR3]], align 16
+ // LLVM: %[[IN_ADDR4:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 4
+ // LLVM: %[[IN_DATA4:.+]] = load <2 x i64>, ptr %[[IN_ADDR4]], align 16
+ // LLVM: %[[IN_ADDR5:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 5
+ // LLVM: %[[IN_DATA5:.+]] = load <2 x i64>, ptr %[[IN_ADDR5]], align 16
+ // LLVM: %[[IN_ADDR6:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 6
+ // LLVM: %[[IN_DATA6:.+]] = load <2 x i64>, ptr %[[IN_ADDR6]], align 16
+ // LLVM: %[[IN_ADDR7:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i64 7
+ // LLVM: %[[IN_DATA7:.+]] = load <2 x i64>, ptr %[[IN_ADDR7]], align 16
+ // LLVM: %[[RESULT:.+]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(ptr %[[H_ADDR:.+]], <2 x i64> %[[IN_DATA0]], <2 x i64> %[[IN_DATA1]], <2 x i64> %[[IN_DATA2]], <2 x i64> %[[IN_DATA3]], <2 x i64> %[[IN_DATA4]], <2 x i64> %[[IN_DATA5]], <2 x i64> %[[IN_DATA6]], <2 x i64> %[[IN_DATA7]])
+ // LLVM: %[[FLAG:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 0
+ // LLVM: %[[SUCC:.+]] = trunc i8 %34 to i1
+ // LLVM: br i1 %[[SUCC]], label %[[NO_ERROR:.+]], label %[[ERROR:.+]]
+ // LLVM: [[NO_ERROR]]:
+ // LLVM: %[[OUT_DATA0:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 1
+ // LLVM: store <2 x i64> %[[OUT_DATA0]], ptr %[[ODATA_PTR:.+]], align 16
+ // LLVM: %[[OUT_DATA1:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 2
+ // LLVM: %[[OUT_ADDR1:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 1
+ // LLVM: store <2 x i64> %[[OUT_DATA1]], ptr %[[OUT_ADDR1]], align 16
+ // LLVM: %[[OUT_DATA2:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 3
+ // LLVM: %[[OUT_ADDR2:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 2
+ // LLVM: store <2 x i64> %[[OUT_DATA2]], ptr %[[OUT_ADDR2]], align 16
+ // LLVM: %[[OUT_DATA3:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 4
+ // LLVM: %[[OUT_ADDR3:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 3
+ // LLVM: store <2 x i64> %[[OUT_DATA3]], ptr %[[OUT_ADDR3]], align 16
+ // LLVM: %[[OUT_DATA4:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 5
+ // LLVM: %[[OUT_ADDR4:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 4
+ // LLVM: store <2 x i64> %[[OUT_DATA4]], ptr %[[OUT_ADDR4]], align 16
+ // LLVM: %[[OUT_DATA5:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 6
+ // LLVM: %[[OUT_ADDR5:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 5
+ // LLVM: store <2 x i64> %[[OUT_DATA5]], ptr %[[OUT_ADDR5]], align 16
+ // LLVM: %[[OUT_DATA6:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 7
+ // LLVM: %[[OUT_ADDR6:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 6
+ // LLVM: store <2 x i64> %[[OUT_DATA6]], ptr %[[OUT_ADDR6]], align 16
+ // LLVM: %[[OUT_DATA7:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 8
+ // LLVM: %[[OUT_ADDR7:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 7
+ // LLVM: store <2 x i64> %[[OUT_DATA7]], ptr %[[OUT_ADDR7]], align 16
+ // LLVM: br label %[[EXIT:.+]]
+ // LLVM: [[ERROR]]:
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[ODATA_PTR]], align 16
+ // LLVM: %[[OUT_ADDR0_1:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 1
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_1]], align 16
+ // LLVM: %[[OUT_ADDR0_2:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 2
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_2]], align 16
+ // LLVM: %[[OUT_ADDR0_3:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 3
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_3]], align 16
+ // LLVM: %[[OUT_ADDR0_4:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 4
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_4]], align 16
+ // LLVM: %[[OUT_ADDR0_5:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 5
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_5]], align 16
+ // LLVM: %[[OUT_ADDR0_6:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 6
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_6]], align 16
+ // LLVM: %[[OUT_ADDR0_7:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i64 7
+ // LLVM: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_7]], align 16
+ // LLVM: br label %[[EXIT]]
+ // LLVM: [[EXIT]]:
+ // LLVM: %[[FLAG1:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 0
+ // LLVM: store i8 %[[FLAG1]], ptr %[[FLAG1_ADDR:.+]], align 1
+ // LLVM: %[[FLAG2:.+]] = load i8, ptr %[[FLAG1_ADDR]], align 1
+ // LLVM: store i8 %[[FLAG2:.+]], ptr %[[FLAG2_ADDR:.+]], align 1
+ // LLVM: %[[RET:.+]] = load i8, ptr %[[FLAG2_ADDR]], align 1
+ // LLVM: ret i8 %[[RET]]
+
+ // OGCG-LABEL: _mm_aesdecwide128kl_u8
+ // OGCG: %[[IN_DATA0:.+]] = load <2 x i64>, ptr %[[IDATA_ADDR:.+]], align 16
+ // OGCG: %[[IN_ADDR1:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 1
+ // OGCG: %[[IN_DATA1:.+]] = load <2 x i64>, ptr %[[IN_ADDR1]], align 16
+ // OGCG: %[[IN_ADDR2:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 2
+ // OGCG: %[[IN_DATA2:.+]] = load <2 x i64>, ptr %[[IN_ADDR2]], align 16
+ // OGCG: %[[IN_ADDR3:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 3
+ // OGCG: %[[IN_DATA3:.+]] = load <2 x i64>, ptr %[[IN_ADDR3]], align 16
+ // OGCG: %[[IN_ADDR4:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 4
+ // OGCG: %[[IN_DATA4:.+]] = load <2 x i64>, ptr %[[IN_ADDR4]], align 16
+ // OGCG: %[[IN_ADDR5:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 5
+ // OGCG: %[[IN_DATA5:.+]] = load <2 x i64>, ptr %[[IN_ADDR5]], align 16
+ // OGCG: %[[IN_ADDR6:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 6
+ // OGCG: %[[IN_DATA6:.+]] = load <2 x i64>, ptr %[[IN_ADDR6]], align 16
+ // OGCG: %[[IN_ADDR7:.+]] = getelementptr <2 x i64>, ptr %[[IDATA_ADDR]], i32 7
+ // OGCG: %[[IN_DATA7:.+]] = load <2 x i64>, ptr %[[IN_ADDR7]], align 16
+ // OGCG: %[[RESULT:.+]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(ptr %[[H_ADDR:.+]], <2 x i64> %[[IN_DATA0]], <2 x i64> %[[IN_DATA1]], <2 x i64> %[[IN_DATA2]], <2 x i64> %[[IN_DATA3]], <2 x i64> %[[IN_DATA4]], <2 x i64> %[[IN_DATA5]], <2 x i64> %[[IN_DATA6]], <2 x i64> %[[IN_DATA7]])
+ // OGCG: %[[FLAG:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 0
+ // OGCG: %[[SUCC:.+]] = trunc i8 %[[FLAG]] to i1
+ // OGCG: br i1 %[[SUCC]], label %[[NO_ERROR:.+]], label %[[ERROR:.+]]
+ // OGCG: [[NO_ERROR]]:
+ // OGCG: %[[OUT_DATA0:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 1
+ // OGCG: store <2 x i64> %[[OUT_DATA0]], ptr %[[ODATA_PTR:.+]], align 16
+ // OGCG: %[[OUT_DATA1:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 2
+ // OGCG: %[[OUT_ADDR1:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 1
+ // OGCG: store <2 x i64> %[[OUT_DATA1]], ptr %[[OUT_ADDR1]], align 16
+ // OGCG: %[[OUT_DATA2:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 3
+ // OGCG: %[[OUT_ADDR2:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 2
+ // OGCG: store <2 x i64> %[[OUT_DATA2]], ptr %[[OUT_ADDR2]], align 16
+ // OGCG: %[[OUT_DATA3:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 4
+ // OGCG: %[[OUT_ADDR3:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 3
+ // OGCG: store <2 x i64> %[[OUT_DATA3]], ptr %[[OUT_ADDR3]], align 16
+ // OGCG: %[[OUT_DATA4:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 5
+ // OGCG: %[[OUT_ADDR4:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 4
+ // OGCG: store <2 x i64> %[[OUT_DATA4]], ptr %[[OUT_ADDR4]], align 16
+ // OGCG: %[[OUT_DATA5:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 6
+ // OGCG: %[[OUT_ADDR5:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 5
+ // OGCG: store <2 x i64> %[[OUT_DATA5]], ptr %[[OUT_ADDR5]], align 16
+ // OGCG: %[[OUT_DATA6:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 7
+ // OGCG: %[[OUT_ADDR6:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 6
+ // OGCG: store <2 x i64> %[[OUT_DATA6]], ptr %[[OUT_ADDR6]], align 16
+ // OGCG: %[[OUT_DATA7:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 8
+ // OGCG: %[[OUT_ADDR7:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 7
+ // OGCG: store <2 x i64> %[[OUT_DATA7]], ptr %[[OUT_ADDR7]], align 16
+ // OGCG: br label %[[EXIT:.+]]
+ // OGCG: [[ERROR]]:
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[ODATA_PTR]], align 16
+ // OGCG: %[[OUT_ADDR0_1:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 1
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_1]], align 16
+ // OGCG: %[[OUT_ADDR0_2:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 2
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_2]], align 16
+ // OGCG: %[[OUT_ADDR0_3:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 3
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_3]], align 16
+ // OGCG: %[[OUT_ADDR0_4:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 4
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_4]], align 16
+ // OGCG: %[[OUT_ADDR0_5:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 5
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_5]], align 16
+ // OGCG: %[[OUT_ADDR0_6:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 6
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_6]], align 16
+ // OGCG: %[[OUT_ADDR0_7:.+]] = getelementptr <2 x i64>, ptr %[[ODATA_PTR]], i32 7
+ // OGCG: store <2 x i64> zeroinitializer, ptr %[[OUT_ADDR0_7]], align 16
+ // OGCG: br label %[[EXIT]]
+ // OGCG: [[EXIT]]:
+ // OGCG: %[[RET:.+]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %[[RESULT]], 0
+ // OGCG: ret i8 %[[RET]]
+ return _mm_aesdecwide128kl_u8(odata, idata, h);
+}
+
>From eb52633fe8eff6495905a976965f07bd72dbd0c4 Mon Sep 17 00:00:00 2001
From: Haocong Lu <haocong.lu at witintech.com>
Date: Wed, 14 Jan 2026 15:08:51 +0800
Subject: [PATCH 2/3] [CIR][X86] fix ci error
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 4 +---
clang/test/CIR/CodeGenBuiltins/X86/keylocker.c | 1 -
2 files changed, 1 insertion(+), 4 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 6e80a9ae7541b..100b60e5ae41f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -586,8 +586,6 @@ static mlir::Value emitX86Aeswide(CIRGenBuilderTy &builder, mlir::Location loc,
llvm::StringRef intrinsicName,
mlir::Type retType,
SmallVectorImpl<mlir::Value> &ops) {
- mlir::Type rstType =
- mlir::cast<cir::PointerType>(ops[0].getType()).getPointee();
mlir::Type vecType =
mlir::cast<cir::PointerType>(ops[1].getType()).getPointee();
@@ -608,7 +606,7 @@ static mlir::Value emitX86Aeswide(CIRGenBuilderTy &builder, mlir::Location loc,
builder.createAlignedLoad(loc, vecType, nextInElePtr,
/*align*/ CharUnits::fromQuantity(16));
arguments.push_back(arg);
- // Collect constant index.
+ // Cache constant index.
constIdx.push_back(idx);
}
cir::RecordType rstRecTy = builder.getAnonRecordTy(rstRec);
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/keylocker.c b/clang/test/CIR/CodeGenBuiltins/X86/keylocker.c
index 445349eaa768d..de4409c2e126d 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/keylocker.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/keylocker.c
@@ -1110,4 +1110,3 @@ unsigned char test__mm_aesdecwide128kl_u8(__m128i odata[8], const __m128i idata[
// OGCG: ret i8 %[[RET]]
return _mm_aesdecwide128kl_u8(odata, idata, h);
}
-
>From 34cbd9925eef88f6613aae538edc24be39a568bc Mon Sep 17 00:00:00 2001
From: Haocong Lu <haocong.lu at witintech.com>
Date: Thu, 15 Jan 2026 11:37:09 +0800
Subject: [PATCH 3/3] [CIR][X86] address comments
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 46 ++---
.../test/CIR/CodeGenBuiltins/X86/keylocker.c | 192 ++++++++++++------
2 files changed, 150 insertions(+), 88 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 100b60e5ae41f..4621479cbbc84 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -544,7 +544,7 @@ static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
static mlir::Value emitX86Aes(CIRGenBuilderTy &builder, mlir::Location loc,
llvm::StringRef intrinsicName, mlir::Type retType,
- SmallVectorImpl<mlir::Value> &ops) {
+ llvm::ArrayRef<mlir::Value> ops) {
// Create return struct type and call intrinsic function.
mlir::Type vecType =
mlir::cast<cir::PointerType>(ops[0].getType()).getPointee();
@@ -555,7 +555,7 @@ static mlir::Value emitX86Aes(CIRGenBuilderTy &builder, mlir::Location loc,
// Extract the first return value and truncate it to 1 bit, then cast result
// to bool value.
mlir::Value flag =
- cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index*/ 0);
+ cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index=*/0);
mlir::Value flagBit0 = builder.createCast(loc, cir::CastKind::integral, flag,
builder.getUIntNTy(1));
mlir::Value succ = builder.createCast(loc, cir::CastKind::int_to_bool,
@@ -563,8 +563,8 @@ static mlir::Value emitX86Aes(CIRGenBuilderTy &builder, mlir::Location loc,
// Extract the second return value, store it to output address if success.
mlir::Value out =
- cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index*/ 1);
- Address outAddr(ops[0], /*align*/ CharUnits::fromQuantity(16));
+ cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index=*/1);
+ Address outAddr(ops[0], /*align=*/CharUnits::fromQuantity(16));
cir::IfOp::create(
builder, loc, succ, /*withElseRegion=*/true,
/*thenBuilder=*/
@@ -579,44 +579,40 @@ static mlir::Value emitX86Aes(CIRGenBuilderTy &builder, mlir::Location loc,
builder.createYield(loc);
});
- return cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index*/ 0);
+ return cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index=*/0);
}
static mlir::Value emitX86Aeswide(CIRGenBuilderTy &builder, mlir::Location loc,
llvm::StringRef intrinsicName,
mlir::Type retType,
- SmallVectorImpl<mlir::Value> &ops) {
+ llvm::ArrayRef<mlir::Value> ops) {
mlir::Type vecType =
mlir::cast<cir::PointerType>(ops[1].getType()).getPointee();
// Create struct for return type and load input arguments, then call
// intrinsic function.
- llvm::SmallVector<mlir::Type, 9> rstRec = {retType};
- llvm::SmallVector<mlir::Value, 9> arguments = {ops[2]};
- llvm::SmallVector<mlir::Value, 8> constIdx;
+ mlir::Type recTypes[9] = {retType, vecType, vecType, vecType, vecType,
+ vecType, vecType, vecType, vecType};
+ mlir::Value arguments[9];
+ arguments[0] = ops[2];
for (int i = 0; i < 8; i++) {
- // Recording return vector type
- rstRec.push_back(vecType);
// Loading each vector argument from input address.
cir::ConstantOp idx = builder.getUInt32(i, loc);
mlir::Value nextInElePtr =
builder.getArrayElement(loc, loc, ops[1], vecType, idx,
/*shouldDecay=*/false);
- mlir::Value arg =
+ arguments[i + 1] =
builder.createAlignedLoad(loc, vecType, nextInElePtr,
- /*align*/ CharUnits::fromQuantity(16));
- arguments.push_back(arg);
- // Cache constant index.
- constIdx.push_back(idx);
+ /*align=*/CharUnits::fromQuantity(16));
}
- cir::RecordType rstRecTy = builder.getAnonRecordTy(rstRec);
+ cir::RecordType rstRecTy = builder.getAnonRecordTy(recTypes);
mlir::Value rstValueRec =
emitIntrinsicCallOp(builder, loc, intrinsicName, rstRecTy, arguments);
// Extract the first return value and truncate it to 1 bit, then cast result
// to bool value.
mlir::Value flag =
- cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index*/ 0);
+ cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index=*/0);
mlir::Value flagBit0 = builder.createCast(loc, cir::CastKind::integral, flag,
builder.getUIntNTy(1));
mlir::Value succ = builder.createCast(loc, cir::CastKind::int_to_bool,
@@ -630,12 +626,13 @@ static mlir::Value emitX86Aeswide(CIRGenBuilderTy &builder, mlir::Location loc,
for (int i = 0; i < 8; i++) {
mlir::Value out =
cir::ExtractMemberOp::create(builder, loc, rstValueRec,
- /*index*/ i + 1);
+ /*index=*/i + 1);
+ cir::ConstantOp idx = builder.getUInt32(i, loc);
mlir::Value nextOutEleAddr =
- builder.getArrayElement(loc, loc, ops[0], vecType, constIdx[i],
+ builder.getArrayElement(loc, loc, ops[0], vecType, idx,
/*shouldDecay=*/false);
Address outAddr(nextOutEleAddr,
- /*align*/ CharUnits::fromQuantity(16));
+ /*align=*/CharUnits::fromQuantity(16));
builder.createStore(loc, out, outAddr);
}
builder.createYield(loc);
@@ -644,17 +641,18 @@ static mlir::Value emitX86Aeswide(CIRGenBuilderTy &builder, mlir::Location loc,
[&](mlir::OpBuilder &b, mlir::Location) {
mlir::Value zero = builder.getNullValue(vecType, loc);
for (int i = 0; i < 8; i++) {
+ cir::ConstantOp idx = builder.getUInt32(i, loc);
mlir::Value nextOutEleAddr =
- builder.getArrayElement(loc, loc, ops[0], vecType, constIdx[i],
+ builder.getArrayElement(loc, loc, ops[0], vecType, idx,
/*shouldDecay=*/false);
Address outAddr(nextOutEleAddr,
- /*align*/ CharUnits::fromQuantity(16));
+ /*align=*/CharUnits::fromQuantity(16));
builder.createStore(loc, zero, outAddr);
}
builder.createYield(loc);
});
- return cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index*/ 0);
+ return cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index=*/0);
}
std::optional<mlir::Value>
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/keylocker.c b/clang/test/CIR/CodeGenBuiltins/X86/keylocker.c
index de4409c2e126d..a4995a108c8c8 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/keylocker.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/keylocker.c
@@ -275,46 +275,62 @@ unsigned char test__mm_aesencwide256kl_u8(__m128i odata[8], const __m128i idata[
// CIR: %[[SUCC:.+]] = cir.cast int_to_bool %[[FLAG_BIT0]] : !cir.int<u, 1> -> !cir.bool
// CIR: cir.if %[[SUCC]] {
// CIR: %[[OUT_DATA0:.+]] = cir.extract_member %[[RESULT]][1] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR0:.+]] = cir.ptr_stride %[[ODATA_ADDR:.+]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[ZERO_1:.+]] = cir.const #cir.int<0> : !u32i
+ // CIR: %[[OUT_ADDR0:.+]] = cir.ptr_stride %[[ODATA_ADDR:.+]], %[[ZERO_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA0]], %[[OUT_ADDR0]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA1:.+]] = cir.extract_member %[[RESULT]][2] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[ONE_1:.+]] = cir.const #cir.int<1> : !u32i
+ // CIR: %[[OUT_ADDR1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA1]], %[[OUT_ADDR1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA2:.+]] = cir.extract_member %[[RESULT]][3] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR2:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[TWO_1:.+]] = cir.const #cir.int<2> : !u32i
+ // CIR: %[[OUT_ADDR2:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA2]], %[[OUT_ADDR2]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA3:.+]] = cir.extract_member %[[RESULT]][4] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR3:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[THREE_1:.+]] = cir.const #cir.int<3> : !u32i
+ // CIR: %[[OUT_ADDR3:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA3]], %[[OUT_ADDR3]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA4:.+]] = cir.extract_member %[[RESULT]][5] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR4:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[FOUR_1:.+]] = cir.const #cir.int<4> : !u32i
+ // CIR: %[[OUT_ADDR4:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA4]], %[[OUT_ADDR4]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA5:.+]] = cir.extract_member %[[RESULT]][6] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR5:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[FIVE_1:.+]] = cir.const #cir.int<5> : !u32i
+ // CIR: %[[OUT_ADDR5:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA5]], %[[OUT_ADDR5]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA6:.+]] = cir.extract_member %[[RESULT]][7] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR6:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[SIX_1:.+]] = cir.const #cir.int<6> : !u32i
+ // CIR: %[[OUT_ADDR6:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA6]], %[[OUT_ADDR6]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA7:.+]] = cir.extract_member %[[RESULT]][8] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR7:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[SEVEN_1:.+]] = cir.const #cir.int<7> : !u32i
+ // CIR: %[[OUT_ADDR7:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA7]], %[[OUT_ADDR7]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: } else {
// CIR: %[[NULL:.+]] = cir.const #cir.zero : !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR0_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[ZERO_2:.+]] = cir.const #cir.int<0> : !u32i
+ // CIR: %[[OUT_ADDR0_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ZERO_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR0_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR1_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[ONE_2:.+]] = cir.const #cir.int<1> : !u32i
+ // CIR: %[[OUT_ADDR1_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR1_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR2_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[TWO_2:.+]] = cir.const #cir.int<2> : !u32i
+ // CIR: %[[OUT_ADDR2_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR2_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR3_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[THREE_2:.+]] = cir.const #cir.int<3> : !u32i
+ // CIR: %[[OUT_ADDR3_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR3_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR4_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[FOUR_2:.+]] = cir.const #cir.int<4> : !u32i
+ // CIR: %[[OUT_ADDR4_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR4_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR5_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[FIVE_2:.+]] = cir.const #cir.int<5> : !u32i
+ // CIR: %[[OUT_ADDR5_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR5_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR6_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[SIX_2:.+]] = cir.const #cir.int<6> : !u32i
+ // CIR: %[[OUT_ADDR6_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR6_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR7_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[SEVEN_2:.+]] = cir.const #cir.int<7> : !u32i
+ // CIR: %[[OUT_ADDR7_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR7_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: }
// CIR: %[[FLAG1:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct1 -> !u8i
@@ -492,46 +508,62 @@ unsigned char test__mm_aesdecwide256kl_u8(__m128i odata[8], const __m128i idata[
// CIR: %[[SUCC:.+]] = cir.cast int_to_bool %[[FLAG_BIT0]] : !cir.int<u, 1> -> !cir.bool
// CIR: cir.if %[[SUCC]] {
// CIR: %[[OUT_DATA0:.+]] = cir.extract_member %[[RESULT]][1] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR0:.+]] = cir.ptr_stride %[[ODATA_ADDR:.+]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[ZERO_1:.+]] = cir.const #cir.int<0> : !u32i
+ // CIR: %[[OUT_ADDR0:.+]] = cir.ptr_stride %[[ODATA_ADDR:.+]], %[[ZERO_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA0]], %[[OUT_ADDR0]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA1:.+]] = cir.extract_member %[[RESULT]][2] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[ONE_1:.+]] = cir.const #cir.int<1> : !u32i
+ // CIR: %[[OUT_ADDR1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA1]], %[[OUT_ADDR1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA2:.+]] = cir.extract_member %[[RESULT]][3] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR2:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[TWO_1:.+]] = cir.const #cir.int<2> : !u32i
+ // CIR: %[[OUT_ADDR2:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA2]], %[[OUT_ADDR2]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA3:.+]] = cir.extract_member %[[RESULT]][4] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR3:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[THREE_1:.+]] = cir.const #cir.int<3> : !u32i
+ // CIR: %[[OUT_ADDR3:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA3]], %[[OUT_ADDR3]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA4:.+]] = cir.extract_member %[[RESULT]][5] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR4:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[FOUR_1:.+]] = cir.const #cir.int<4> : !u32i
+ // CIR: %[[OUT_ADDR4:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA4]], %[[OUT_ADDR4]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA5:.+]] = cir.extract_member %[[RESULT]][6] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR5:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[FIVE_1:.+]] = cir.const #cir.int<5> : !u32i
+ // CIR: %[[OUT_ADDR5:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA5]], %[[OUT_ADDR5]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA6:.+]] = cir.extract_member %[[RESULT]][7] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR6:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[SIX_1:.+]] = cir.const #cir.int<6> : !u32i
+ // CIR: %[[OUT_ADDR6:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA6]], %[[OUT_ADDR6]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA7:.+]] = cir.extract_member %[[RESULT]][8] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR7:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[SEVEN_1:.+]] = cir.const #cir.int<7> : !u32i
+ // CIR: %[[OUT_ADDR7:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA7]], %[[OUT_ADDR7]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: } else {
// CIR: %[[NULL:.+]] = cir.const #cir.zero : !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR0_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[ZERO_2:.+]] = cir.const #cir.int<0> : !u32i
+ // CIR: %[[OUT_ADDR0_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ZERO_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR0_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR1_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[ONE_2:.+]] = cir.const #cir.int<1> : !u32i
+ // CIR: %[[OUT_ADDR1_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR1_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR2_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[TWO_2:.+]] = cir.const #cir.int<2> : !u32i
+ // CIR: %[[OUT_ADDR2_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR2_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR3_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[THREE_2:.+]] = cir.const #cir.int<3> : !u32i
+ // CIR: %[[OUT_ADDR3_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR3_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR4_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[FOUR_2:.+]] = cir.const #cir.int<4> : !u32i
+ // CIR: %[[OUT_ADDR4_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR4_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR5_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[FIVE_2:.+]] = cir.const #cir.int<5> : !u32i
+ // CIR: %[[OUT_ADDR5_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR5_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR6_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[SIX_2:.+]] = cir.const #cir.int<6> : !u32i
+ // CIR: %[[OUT_ADDR6_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR6_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR7_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[SEVEN_2:.+]] = cir.const #cir.int<7> : !u32i
+ // CIR: %[[OUT_ADDR7_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR7_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: }
// CIR: %[[FLAG1:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct1 -> !u8i
@@ -709,46 +741,62 @@ unsigned char test__mm_aesencwide128kl_u8(__m128i odata[8], const __m128i idata[
// CIR: %[[SUCC:.+]] = cir.cast int_to_bool %[[FLAG_BIT0]] : !cir.int<u, 1> -> !cir.bool
// CIR: cir.if %[[SUCC]] {
// CIR: %[[OUT_DATA0:.+]] = cir.extract_member %[[RESULT]][1] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR0:.+]] = cir.ptr_stride %[[ODATA_ADDR:.+]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[ZERO_1:.+]] = cir.const #cir.int<0> : !u32i
+ // CIR: %[[OUT_ADDR0:.+]] = cir.ptr_stride %[[ODATA_ADDR:.+]], %[[ZERO_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA0]], %[[OUT_ADDR0]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA1:.+]] = cir.extract_member %[[RESULT]][2] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[ONE_1:.+]] = cir.const #cir.int<1> : !u32i
+ // CIR: %[[OUT_ADDR1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA1]], %[[OUT_ADDR1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA2:.+]] = cir.extract_member %[[RESULT]][3] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR2:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[TWO_1:.+]] = cir.const #cir.int<2> : !u32i
+ // CIR: %[[OUT_ADDR2:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA2]], %[[OUT_ADDR2]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA3:.+]] = cir.extract_member %[[RESULT]][4] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR3:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[THREE_1:.+]] = cir.const #cir.int<3> : !u32i
+ // CIR: %[[OUT_ADDR3:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA3]], %[[OUT_ADDR3]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA4:.+]] = cir.extract_member %[[RESULT]][5] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR4:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[FOUR_1:.+]] = cir.const #cir.int<4> : !u32i
+ // CIR: %[[OUT_ADDR4:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA4]], %[[OUT_ADDR4]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA5:.+]] = cir.extract_member %[[RESULT]][6] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR5:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[FIVE_1:.+]] = cir.const #cir.int<5> : !u32i
+ // CIR: %[[OUT_ADDR5:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA5]], %[[OUT_ADDR5]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA6:.+]] = cir.extract_member %[[RESULT]][7] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR6:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[SIX_1:.+]] = cir.const #cir.int<6> : !u32i
+ // CIR: %[[OUT_ADDR6:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA6]], %[[OUT_ADDR6]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA7:.+]] = cir.extract_member %[[RESULT]][8] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR7:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[SEVEN_1:.+]] = cir.const #cir.int<7> : !u32i
+ // CIR: %[[OUT_ADDR7:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA7]], %[[OUT_ADDR7]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: } else {
// CIR: %[[NULL:.+]] = cir.const #cir.zero : !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR0_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[ZERO_2:.+]] = cir.const #cir.int<0> : !u32i
+ // CIR: %[[OUT_ADDR0_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ZERO_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR0_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR1_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[ONE_2:.+]] = cir.const #cir.int<1> : !u32i
+ // CIR: %[[OUT_ADDR1_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR1_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR2_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[TWO_2:.+]] = cir.const #cir.int<2> : !u32i
+ // CIR: %[[OUT_ADDR2_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR2_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR3_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[THREE_2:.+]] = cir.const #cir.int<3> : !u32i
+ // CIR: %[[OUT_ADDR3_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR3_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR4_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[FOUR_2:.+]] = cir.const #cir.int<4> : !u32i
+ // CIR: %[[OUT_ADDR4_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR4_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR5_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[FIVE_2:.+]] = cir.const #cir.int<5> : !u32i
+ // CIR: %[[OUT_ADDR5_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR5_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR6_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[SIX_2:.+]] = cir.const #cir.int<6> : !u32i
+ // CIR: %[[OUT_ADDR6_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR6_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR7_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[SEVEN_2:.+]] = cir.const #cir.int<7> : !u32i
+ // CIR: %[[OUT_ADDR7_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR7_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: }
// CIR: %[[FLAG1:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct1 -> !u8i
@@ -926,46 +974,62 @@ unsigned char test__mm_aesdecwide128kl_u8(__m128i odata[8], const __m128i idata[
// CIR: %[[SUCC:.+]] = cir.cast int_to_bool %[[FLAG_BIT0]] : !cir.int<u, 1> -> !cir.bool
// CIR: cir.if %[[SUCC]] {
// CIR: %[[OUT_DATA0:.+]] = cir.extract_member %[[RESULT]][1] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR0:.+]] = cir.ptr_stride %[[ODATA_ADDR:.+]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[ZERO_1:.+]] = cir.const #cir.int<0> : !u32i
+ // CIR: %[[OUT_ADDR0:.+]] = cir.ptr_stride %[[ODATA_ADDR:.+]], %[[ZERO_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA0]], %[[OUT_ADDR0]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA1:.+]] = cir.extract_member %[[RESULT]][2] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[ONE_1:.+]] = cir.const #cir.int<1> : !u32i
+ // CIR: %[[OUT_ADDR1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA1]], %[[OUT_ADDR1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA2:.+]] = cir.extract_member %[[RESULT]][3] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR2:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[TWO_1:.+]] = cir.const #cir.int<2> : !u32i
+ // CIR: %[[OUT_ADDR2:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA2]], %[[OUT_ADDR2]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA3:.+]] = cir.extract_member %[[RESULT]][4] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR3:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[THREE_1:.+]] = cir.const #cir.int<3> : !u32i
+ // CIR: %[[OUT_ADDR3:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA3]], %[[OUT_ADDR3]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA4:.+]] = cir.extract_member %[[RESULT]][5] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR4:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[FOUR_1:.+]] = cir.const #cir.int<4> : !u32i
+ // CIR: %[[OUT_ADDR4:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA4]], %[[OUT_ADDR4]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA5:.+]] = cir.extract_member %[[RESULT]][6] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR5:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[FIVE_1:.+]] = cir.const #cir.int<5> : !u32i
+ // CIR: %[[OUT_ADDR5:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA5]], %[[OUT_ADDR5]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA6:.+]] = cir.extract_member %[[RESULT]][7] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR6:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[SIX_1:.+]] = cir.const #cir.int<6> : !u32i
+ // CIR: %[[OUT_ADDR6:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA6]], %[[OUT_ADDR6]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: %[[OUT_DATA7:.+]] = cir.extract_member %[[RESULT]][8] : !rec_anon_struct1 -> !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR7:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[SEVEN_1:.+]] = cir.const #cir.int<7> : !u32i
+ // CIR: %[[OUT_ADDR7:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN_1]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[OUT_DATA7]], %[[OUT_ADDR7]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: } else {
// CIR: %[[NULL:.+]] = cir.const #cir.zero : !cir.vector<2 x !s64i>
- // CIR: %[[OUT_ADDR0_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ZERO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[ZERO_2:.+]] = cir.const #cir.int<0> : !u32i
+ // CIR: %[[OUT_ADDR0_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ZERO_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR0_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR1_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[ONE_2:.+]] = cir.const #cir.int<1> : !u32i
+ // CIR: %[[OUT_ADDR1_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[ONE_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR1_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR2_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[TWO_2:.+]] = cir.const #cir.int<2> : !u32i
+ // CIR: %[[OUT_ADDR2_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[TWO_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR2_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR3_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[THREE_2:.+]] = cir.const #cir.int<3> : !u32i
+ // CIR: %[[OUT_ADDR3_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[THREE_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR3_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR4_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[FOUR_2:.+]] = cir.const #cir.int<4> : !u32i
+ // CIR: %[[OUT_ADDR4_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FOUR_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR4_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR5_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[FIVE_2:.+]] = cir.const #cir.int<5> : !u32i
+ // CIR: %[[OUT_ADDR5_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[FIVE_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR5_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR6_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[SIX_2:.+]] = cir.const #cir.int<6> : !u32i
+ // CIR: %[[OUT_ADDR6_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SIX_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR6_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
- // CIR: %[[OUT_ADDR7_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
+ // CIR: %[[SEVEN_2:.+]] = cir.const #cir.int<7> : !u32i
+ // CIR: %[[OUT_ADDR7_1:.+]] = cir.ptr_stride %[[ODATA_ADDR]], %[[SEVEN_2]] : (!cir.ptr<!cir.vector<2 x !s64i>>, !u32i) -> !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: cir.store align(16) %[[NULL]], %[[OUT_ADDR7_1]] : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>
// CIR: }
// CIR: %[[FLAG1:.+]] = cir.extract_member %[[RESULT]][0] : !rec_anon_struct1 -> !u8i
More information about the cfe-commits
mailing list