[clang] [llvm] [CIR][X86] Implement lowering for sqrt builtins (PR #169310)
Priyanshu Kumar via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 12:58:56 PST 2025
https://github.com/Priyanshu3820 updated https://github.com/llvm/llvm-project/pull/169310
>From 627bcb3bde64a780ed2b9aaaa9267d97c9679f9c Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Wed, 26 Nov 2025 17:45:00 +0530
Subject: [PATCH 01/32] Add CIR sqrt builtin support for X86
---
clang/include/clang/CIR/Dialect/IR/CIROps.td | 344 +++++++++++++++++-
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 66 +++-
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 323 +++++++++++++++-
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 14 +
.../CIR/CodeGen/X86/cir-sqrtps-builtins.c | 46 +++
5 files changed, 772 insertions(+), 21 deletions(-)
create mode 100644 clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index e612d6a0ba886..291b035e6204c 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -802,8 +802,8 @@ def CIR_ConditionOp : CIR_Op<"condition", [
//===----------------------------------------------------------------------===//
defvar CIR_YieldableScopes = [
- "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "GlobalOp", "IfOp",
- "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp", "TryOp"
+ "ArrayCtor", "ArrayDtor", "AwaitOp", "CaseOp", "DoWhileOp", "ForOp",
+ "GlobalOp", "IfOp", "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp", "TryOp"
];
def CIR_YieldOp : CIR_Op<"yield", [
@@ -1640,6 +1640,82 @@ def CIR_CmpOp : CIR_Op<"cmp", [Pure, SameTypeOperands]> {
let isLLVMLoweringRecursive = true;
}
+//===----------------------------------------------------------------------===//
+// BinOpOverflowOp
+//===----------------------------------------------------------------------===//
+
+def CIR_BinOpOverflowKind : CIR_I32EnumAttr<
+ "BinOpOverflowKind", "checked binary arithmetic operation kind", [
+ I32EnumAttrCase<"Add", 0, "add">,
+ I32EnumAttrCase<"Sub", 1, "sub">,
+ I32EnumAttrCase<"Mul", 2, "mul">
+]>;
+
+def CIR_BinOpOverflowOp : CIR_Op<"binop.overflow", [Pure, SameTypeOperands]> {
+ let summary = "Perform binary integral arithmetic with overflow checking";
+ let description = [{
+ `cir.binop.overflow` performs binary arithmetic operations with overflow
+ checking on integral operands.
+
+ The `kind` argument specifies the kind of arithmetic operation to perform.
+ It can be either `add`, `sub`, or `mul`. The `lhs` and `rhs` arguments
+ specify the input operands of the arithmetic operation. The types of `lhs`
+ and `rhs` must be the same.
+
+ `cir.binop.overflow` produces two SSA values. `result` is the result of the
+ arithmetic operation truncated to its specified type. `overflow` is a
+ boolean value indicating whether overflow happens during the operation.
+
+ The exact semantic of this operation is as follows:
+
+ - `lhs` and `rhs` are promoted to an imaginary integral type that has
+ infinite precision.
+ - The arithmetic operation is performed on the promoted operands.
+ - The infinite-precision result is truncated to the type of `result`. The
+ truncated result is assigned to `result`.
+ - If the truncated result is equal to the un-truncated result, `overflow`
+ is assigned to false. Otherwise, `overflow` is assigned to true.
+ }];
+
+ let arguments = (ins
+ CIR_BinOpOverflowKind:$kind,
+ CIR_IntType:$lhs,
+ CIR_IntType:$rhs
+ );
+
+ let results = (outs CIR_IntType:$result, CIR_BoolType:$overflow);
+
+ let assemblyFormat = [{
+ `(` $kind `,` $lhs `,` $rhs `)` `:` qualified(type($lhs)) `,`
+ `(` qualified(type($result)) `,` qualified(type($overflow)) `)`
+ attr-dict
+ }];
+
+ let builders = [
+ OpBuilder<(ins "cir::IntType":$resultTy,
+ "cir::BinOpOverflowKind":$kind,
+ "mlir::Value":$lhs,
+ "mlir::Value":$rhs), [{
+ auto overflowTy = cir::BoolType::get($_builder.getContext());
+ build($_builder, $_state, resultTy, overflowTy, kind, lhs, rhs);
+ }]>
+ ];
+
+ let extraLLVMLoweringPatternDecl = [{
+ static std::string getLLVMIntrinName(cir::BinOpOverflowKind opKind,
+ bool isSigned, unsigned width);
+
+ struct EncompassedTypeInfo {
+ bool sign;
+ unsigned width;
+ };
+
+ static EncompassedTypeInfo computeEncompassedTypeWidth(cir::IntType operandTy,
+ cir::IntType resultTy);
+ }];
+}
+
+
//===----------------------------------------------------------------------===//
// BinOp
//===----------------------------------------------------------------------===//
@@ -2533,7 +2609,9 @@ def CIR_FuncOp : CIR_Op<"func", [
OptionalAttr<DictArrayAttr>:$res_attrs,
OptionalAttr<FlatSymbolRefAttr>:$aliasee,
CIR_OptionalPriorityAttr:$global_ctor_priority,
- CIR_OptionalPriorityAttr:$global_dtor_priority);
+ CIR_OptionalPriorityAttr:$global_dtor_priority,
+ OptionalAttr<CIR_CXXSpecialMemberAttr>:$cxx_special_member
+ );
let regions = (region AnyRegion:$body);
@@ -2572,7 +2650,32 @@ def CIR_FuncOp : CIR_Op<"func", [
//===------------------------------------------------------------------===//
bool isDeclaration();
- }];
+
+ //===------------------------------------------------------------------===//
+ // C++ Special Member Functions
+ //===------------------------------------------------------------------===//
+
+ /// Returns true if this function is a C++ special member function.
+ bool isCXXSpecialMemberFunction();
+
+ bool isCxxConstructor();
+ bool isCxxDestructor();
+
+ /// Returns true if this function is a copy or move assignment operator.
+ bool isCxxSpecialAssignment();
+
+ /// Returns the kind of constructor this function represents, if any.
+ std::optional<CtorKind> getCxxConstructorKind();
+
+ /// Returns the kind of assignment operator (move, copy) this function
+ /// represents, if any.
+ std::optional<AssignKind> getCxxSpecialAssignKind();
+
+ /// Returns true if the function is a trivial C++ member functions such as
+ /// trivial default constructor, copy/move constructor, copy/move assignment,
+ /// or destructor.
+ bool isCxxTrivialMemberFunction();
+}];
let hasCustomAssemblyFormat = 1;
let hasVerifier = 1;
@@ -2752,6 +2855,100 @@ def CIR_CallOp : CIR_CallOpBase<"call", [NoRegionArguments]> {
];
}
+//===----------------------------------------------------------------------===//
+// AwaitOp
+//===----------------------------------------------------------------------===//
+
+def CIR_AwaitKind : CIR_I32EnumAttr<"AwaitKind", "await kind", [
+ I32EnumAttrCase<"Init", 0, "init">,
+ I32EnumAttrCase<"User", 1, "user">,
+ I32EnumAttrCase<"Yield", 2, "yield">,
+ I32EnumAttrCase<"Final", 3, "final">
+]>;
+
+def CIR_AwaitOp : CIR_Op<"await",[
+ DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+ RecursivelySpeculatable, NoRegionArguments
+]> {
+ let summary = "Wraps C++ co_await implicit logic";
+ let description = [{
+ The under the hood effect of using C++ `co_await expr` roughly
+ translates to:
+
+ ```c++
+ // co_await expr;
+
+ auto &&x = CommonExpr();
+ if (!x.await_ready()) {
+ ...
+ x.await_suspend(...);
+ ...
+ }
+ x.await_resume();
+ ```
+
+ `cir.await` represents this logic by using 3 regions:
+ - ready: covers veto power from x.await_ready()
+ - suspend: wraps actual x.await_suspend() logic
+ - resume: handles x.await_resume()
+
+ Breaking this up in regions allows individual scrutiny of conditions
+ which might lead to folding some of them out. Lowerings coming out
+ of CIR, e.g. LLVM, should use the `suspend` region to track more
+ lower level codegen (e.g. intrinsic emission for coro.save/coro.suspend).
+
+ There are also 4 flavors of `cir.await` available:
+ - `init`: compiler generated initial suspend via implicit `co_await`.
+ - `user`: also known as normal, representing a user written `co_await`.
+ - `yield`: user written `co_yield` expressions.
+ - `final`: compiler generated final suspend via implicit `co_await`.
+
+ ```mlir
+ cir.scope {
+ ... // auto &&x = CommonExpr();
+ cir.await(user, ready : {
+ ... // x.await_ready()
+ }, suspend : {
+ ... // x.await_suspend()
+ }, resume : {
+ ... // x.await_resume()
+ })
+ }
+ ```
+
+ Note that resulution of the common expression is assumed to happen
+ as part of the enclosing await scope.
+ }];
+
+ let arguments = (ins CIR_AwaitKind:$kind);
+ let regions = (region SizedRegion<1>:$ready,
+ SizedRegion<1>:$suspend,
+ SizedRegion<1>:$resume);
+ let assemblyFormat = [{
+ `(` $kind `,`
+ `ready` `:` $ready `,`
+ `suspend` `:` $suspend `,`
+ `resume` `:` $resume `,`
+ `)`
+ attr-dict
+ }];
+
+ let skipDefaultBuilders = 1;
+ let builders = [
+ OpBuilder<(ins
+ "cir::AwaitKind":$kind,
+ CArg<"BuilderCallbackRef",
+ "nullptr">:$readyBuilder,
+ CArg<"BuilderCallbackRef",
+ "nullptr">:$suspendBuilder,
+ CArg<"BuilderCallbackRef",
+ "nullptr">:$resumeBuilder
+ )>
+ ];
+
+ let hasVerifier = 1;
+}
+
//===----------------------------------------------------------------------===//
// CopyOp
//===----------------------------------------------------------------------===//
@@ -2988,6 +3185,39 @@ def CIR_InlineAsmOp : CIR_Op<"asm", [RecursiveMemoryEffects]> {
let hasCustomAssemblyFormat = 1;
}
+//===----------------------------------------------------------------------===//
+// SqrtOp
+//===----------------------------------------------------------------------===//
+
+def CIR_SqrtOp : CIR_Op<"sqrt", [Pure]> {
+ let summary = "Floating-point square root";
+
+ let description = [{
+ The `cir.sqrt` operation computes the element-wise square root of its input.
+
+ The input must be either:
+ • a floating-point scalar type, or
+ • a vector whose element type is floating-point.
+
+ The result type must match the input type exactly.
+
+ Examples:
+ // scalar
+ %r = cir.sqrt %x : !cir.fp64
+
+ // vector
+ %v = cir.sqrt %vec : !cir.vector<!cir.fp32 x 4>
+ }];
+
+ // input and output types: float or vector-of-float
+ let arguments = (ins CIR_AnyFloatOrVecOfFloatType:$input);
+ let results = (outs CIR_AnyFloatOrVecOfFloatType:$result);
+
+ let assemblyFormat = [{
+ $input `:` type($input) attr-dict
+ }];
+}
+
//===----------------------------------------------------------------------===//
// UnreachableOp
//===----------------------------------------------------------------------===//
@@ -4018,6 +4248,72 @@ def CIR_RotateOp : CIR_Op<"rotate", [Pure, SameOperandsAndResultType]> {
let hasFolder = 1;
}
+//===----------------------------------------------------------------------===//
+// FPClass Test Flags
+//===----------------------------------------------------------------------===//
+
+def FPClassTestEnum : CIR_I32EnumAttr<"FPClassTest", "floating-point class test flags", [
+ // Basic flags
+ I32EnumAttrCase<"SignalingNaN", 1, "fcSNan">,
+ I32EnumAttrCase<"QuietNaN", 2, "fcQNan">,
+ I32EnumAttrCase<"NegativeInfinity", 4, "fcNegInf">,
+ I32EnumAttrCase<"NegativeNormal", 8, "fcNegNormal">,
+ I32EnumAttrCase<"NegativeSubnormal", 16, "fcNegSubnormal">,
+ I32EnumAttrCase<"NegativeZero", 32, "fcNegZero">,
+ I32EnumAttrCase<"PositiveZero", 64, "fcPosZero">,
+ I32EnumAttrCase<"PositiveSubnormal", 128, "fcPosSubnormal">,
+ I32EnumAttrCase<"PositiveNormal", 256, "fcPosNormal">,
+ I32EnumAttrCase<"PositiveInfinity", 512, "fcPosInf">,
+
+ // Composite flags
+ I32EnumAttrCase<"Nan", 3, "fcNan">, // fcSNan | fcQNan
+ I32EnumAttrCase<"Infinity", 516, "fcInf">, // fcPosInf | fcNegInf
+ I32EnumAttrCase<"Normal", 264, "fcNormal">, // fcPosNormal | fcNegNormal
+ I32EnumAttrCase<"Subnormal", 144, "fcSubnormal">, // fcPosSubnormal | fcNegSubnormal
+ I32EnumAttrCase<"Zero", 96, "fcZero">, // fcPosZero | fcNegZero
+ I32EnumAttrCase<"PositiveFinite", 448, "fcPosFinite">,// fcPosNormal | fcPosSubnormal | fcPosZero
+ I32EnumAttrCase<"NegativeFinite", 56, "fcNegFinite">, // fcNegNormal | fcNegSubnormal | fcNegZero
+ I32EnumAttrCase<"Finite", 504, "fcFinite">, // fcPosFinite | fcNegFinite
+ I32EnumAttrCase<"Positive", 960, "fcPositive">, // fcPosFinite | fcPosInf
+ I32EnumAttrCase<"Negative", 60, "fcNegative">, // fcNegFinite | fcNegInf
+ I32EnumAttrCase<"All", 1023, "fcAllFlags">, // fcNan | fcInf | fcFinite
+]> {
+ let cppNamespace = "::cir";
+}
+
+def CIR_IsFPClassOp : CIR_Op<"is_fp_class"> {
+ let summary = "Corresponding to the `__builtin_fpclassify` builtin function in clang";
+
+ let description = [{
+ The `cir.is_fp_class` operation takes a floating-point value as its first
+ argument and a bitfield of flags as its second argument. The operation
+ returns a boolean value indicating whether the floating-point value
+ satisfies the given flags.
+
+ The flags must be a compile time constant and the values are:
+
+ | Bit # | floating-point class |
+ | ----- | -------------------- |
+ | 0 | Signaling NaN |
+ | 1 | Quiet NaN |
+ | 2 | Negative infinity |
+ | 3 | Negative normal |
+ | 4 | Negative subnormal |
+ | 5 | Negative zero |
+ | 6 | Positive zero |
+ | 7 | Positive subnormal |
+ | 8 | Positive normal |
+ | 9 | Positive infinity |
+ }];
+
+ let arguments = (ins CIR_AnyFloatType:$src,
+ FPClassTestEnum:$flags);
+ let results = (outs CIR_BoolType:$result);
+ let assemblyFormat = [{
+ $src `,` $flags `:` functional-type($src, $result) attr-dict
+ }];
+}
+
//===----------------------------------------------------------------------===//
// Assume Operations
//===----------------------------------------------------------------------===//
@@ -4202,7 +4498,7 @@ def CIR_ObjSizeOp : CIR_Op<"objsize", [Pure]> {
When the `min` attribute is present, the operation returns the minimum
guaranteed accessible size. When absent (max mode), it returns the maximum
possible object size. Corresponds to `llvm.objectsize`'s `min` argument.
-
+
The `dynamic` attribute determines if the value should be evaluated at
runtime. Corresponds to `llvm.objectsize`'s `dynamic` argument.
@@ -4658,6 +4954,44 @@ def CIR_TryOp : CIR_Op<"try",[
let hasLLVMLowering = false;
}
+//===----------------------------------------------------------------------===//
+// Exception related: EhInflightOp
+//===----------------------------------------------------------------------===//
+
+def CIR_EhInflightOp : CIR_Op<"eh.inflight_exception"> {
+ let summary = "Materialize the catch clause formal parameter";
+ let description = [{
+ `cir.eh.inflight_exception` returns two values:
+ - `exception_ptr`: The exception pointer for the inflight exception
+ - `type_id`: the type info index for the exception type
+ This operation is expected to be the first operation in the unwind
+ destination basic blocks of a `cir.try_call` operation.
+
+ The `cleanup` attribute indicates that clean up code must be run before the
+ values produced by this operation are used to dispatch the exception. This
+ cleanup code must be executed even if the exception is not caught.
+ This helps CIR to pass down more accurate information for LLVM lowering
+ to landingpads.
+
+ Example:
+
+ ```mlir
+ %exception_ptr, %type_id = cir.eh.inflight_exception
+ %exception_ptr, %type_id = cir.eh.inflight_exception [@_ZTIi, @_ZTIPKc]
+ %exception_ptr, %type_id = cir.eh.inflight_exception cleanup
+ ``
+ }];
+
+ let arguments = (ins UnitAttr:$cleanup,
+ OptionalAttr<FlatSymbolRefArrayAttr>:$catch_type_list);
+ let results = (outs CIR_VoidPtrType:$exception_ptr, CIR_UInt32:$type_id);
+ let assemblyFormat = [{
+ (`cleanup` $cleanup^)?
+ ($catch_type_list^)?
+ attr-dict
+ }];
+}
+
//===----------------------------------------------------------------------===//
// Atomic operations
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index ee6900141647f..e91a9e4db229a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -121,20 +121,36 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
return emitIntrinsicCallOp(*this, expr, "x86.sse.sfence", voidTy);
case X86::BI_mm_prefetch:
case X86::BI__rdtsc:
- case X86::BI__builtin_ia32_rdtscp:
+ case X86::BI__builtin_ia32_rdtscp: {
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ }
case X86::BI__builtin_ia32_lzcnt_u16:
case X86::BI__builtin_ia32_lzcnt_u32:
- case X86::BI__builtin_ia32_lzcnt_u64:
+ case X86::BI__builtin_ia32_lzcnt_u64: {
+ mlir::Value isZeroPoison = builder.getFalse(getLoc(expr->getExprLoc()));
+ return emitIntrinsicCallOp(*this, expr, "ctlz", ops[0].getType(),
+ mlir::ValueRange{ops[0], isZeroPoison});
+ }
case X86::BI__builtin_ia32_tzcnt_u16:
case X86::BI__builtin_ia32_tzcnt_u32:
- case X86::BI__builtin_ia32_tzcnt_u64:
+ case X86::BI__builtin_ia32_tzcnt_u64: {
+ mlir::Value isZeroPoison = builder.getFalse(getLoc(expr->getExprLoc()));
+ return emitIntrinsicCallOp(*this, expr, "cttz", ops[0].getType(),
+ mlir::ValueRange{ops[0], isZeroPoison});
+ }
case X86::BI__builtin_ia32_undef128:
case X86::BI__builtin_ia32_undef256:
case X86::BI__builtin_ia32_undef512:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
+ // The x86 definition of "undef" is not the same as the LLVM definition
+ // (PR32176). We leave optimizing away an unnecessary zero constant to the
+ // IR optimizer and backend.
+ // TODO: If we had a "freeze" IR instruction to generate a fixed undef
+ // value, we should use that here instead of a zero.
+ return builder.getNullValue(convertType(expr->getType()),
+ getLoc(expr->getExprLoc()));
case X86::BI__builtin_ia32_vec_ext_v4hi:
case X86::BI__builtin_ia32_vec_ext_v16qi:
case X86::BI__builtin_ia32_vec_ext_v8hi:
@@ -169,10 +185,26 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_vec_set_v16hi:
case X86::BI__builtin_ia32_vec_set_v8si:
case X86::BI__builtin_ia32_vec_set_v4di:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
case X86::BI_mm_setcsr:
- case X86::BI__builtin_ia32_ldmxcsr:
+ case X86::BI__builtin_ia32_ldmxcsr: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ Address tmp = createMemTemp(expr->getArg(0)->getType(), loc);
+ builder.createStore(loc, ops[0], tmp);
+ return emitIntrinsicCallOp(*this, expr, "x86.sse.ldmxcsr",
+ builder.getVoidTy(), tmp.getPointer());
+ }
case X86::BI_mm_getcsr:
- case X86::BI__builtin_ia32_stmxcsr:
+ case X86::BI__builtin_ia32_stmxcsr: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ Address tmp = createMemTemp(expr->getType(), loc);
+ emitIntrinsicCallOp(*this, expr, "x86.sse.stmxcsr", builder.getVoidTy(),
+ tmp.getPointer());
+ return builder.createLoad(loc, tmp);
+ }
case X86::BI__builtin_ia32_xsave:
case X86::BI__builtin_ia32_xsave64:
case X86::BI__builtin_ia32_xrstor:
@@ -681,10 +713,24 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_sqrtsh_round_mask:
case X86::BI__builtin_ia32_sqrtsd_round_mask:
case X86::BI__builtin_ia32_sqrtss_round_mask:
+ errorNYI("masked round sqrt builtins");
+ return {};
case X86::BI__builtin_ia32_sqrtpd256:
case X86::BI__builtin_ia32_sqrtpd:
case X86::BI__builtin_ia32_sqrtps256:
- case X86::BI__builtin_ia32_sqrtps:
+ case X86::BI__builtin_ia32_sqrtps: {
+ mlir::Location loc = getLoc(E->getExprLoc());
+ assert(E->getNumArgs() == 1 && "__builtin_ia32_sqrtps takes one argument");
+ mlir::Value arg = emitScalarExpr(E->getArg(0));
+ mlir::Type argTy = arg.getType();
+ if (auto vecTy = argTy.dyn_cast<mlir::VectorType>()) {
+ assert(vecTy.getNumElements() == 4 &&
+ vecTy.getElementType().isa<mlir::FloatType>() &&
+ "__builtin_ia32_sqrtps expects <4 x float> / __m128");
+ }
+ auto sqrt = cir::SqrtOp > ::create(builder, loc, argTy, arg);
+ return sqrt.getResult();
+ }
case X86::BI__builtin_ia32_sqrtph256:
case X86::BI__builtin_ia32_sqrtph:
case X86::BI__builtin_ia32_sqrtph512:
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index d43a462a25092..937c66082ca40 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -30,6 +30,7 @@
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Export.h"
#include "mlir/Transforms/DialectConversion.h"
+#include "clang/Basic/LLVM.h"
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
@@ -44,6 +45,96 @@
using namespace cir;
using namespace llvm;
+using namespace mlir;
+
+static std::string getLLVMIntrinsicNameForType(Type llvmTy) {
+ std::string s;
+ {
+ llvm::raw_string_ostream os(s);
+ llvm::Type *unused = nullptr;
+ os << llvmTy;
+ }
+ if (auto vecTy = llvmTy.dyn_cast<LLVM::LLVMType>()) {
+ }
+ return s;
+}
+
+// Actual lowering
+LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
+ cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
+ ConversionPatternRewriter &rewriter) const {
+
+ Location loc = op.getLoc();
+ MLIRContext *ctx = rewriter.getContext();
+
+ Type cirResTy = op.getResult().getType();
+ Type llvmResTy = getTypeConverter()->convertType(cirResTy);
+ if (!llvmResTy)
+ return op.emitOpError(
+ "expected LLVM dialect result type for cir.sqrt lowering");
+
+ Value operand = adaptor.getInput();
+ Value llvmOperand = operand;
+ if (operand.getType() != llvmResTy) {
+ llvmOperand = rewriter.create<LLVM::BitcastOp>(loc, llvmResTy, operand);
+ }
+
+ // Build the llvm.sqrt.* intrinsic name depending on scalar vs vector result
+ std::string intrinsicName = "llvm.sqrt.";
+ std::string suffix;
+
+ // If the CIR result type is a vector, include the 'vN' part in the suffix.
+ if (auto vec = cirResTy.dyn_cast<cir::VectorType>()) {
+ Type elt = vec.getElementType();
+ if (auto f = elt.dyn_cast<cir::FloatType>()) {
+ unsigned width = f.getWidth();
+ unsigned n = vec.getNumElements();
+ if (width == 32)
+ suffix = "v" + std::to_string(n) + "f32";
+ else if (width == 64)
+ suffix = "v" + std::to_string(n) + "f64";
+ else if (width == 16)
+ suffix = "v" + std::to_string(n) + "f16";
+ else
+ return op.emitOpError("unsupported float width for sqrt");
+ } else {
+ return op.emitOpError("vector element must be floating point for sqrt");
+ }
+ } else if (auto f = cirResTy.dyn_cast<cir::FloatType>()) {
+ // Scalar float
+ unsigned width = f.getWidth();
+ if (width == 32)
+ suffix = "f32";
+ else if (width == 64)
+ suffix = "f64";
+ else if (width == 16)
+ suffix = "f16";
+ else
+ return op.emitOpError("unsupported float width for sqrt");
+ } else {
+ return op.emitOpError("unsupported type for cir.sqrt lowering");
+ }
+
+ intrinsicName += suffix;
+
+ // Ensure the llvm intrinsic function exists at module scope. Insert it at
+ // the start of the module body using an insertion guard.
+ ModuleOp module = op->getParentOfType<ModuleOp>();
+ if (!module.lookupSymbol<LLVM::LLVMFuncOp>(intrinsicName)) {
+ OpBuilder::InsertionGuard guard(rewriter);
+ rewriter.setInsertionPointToStart(module.getBody());
+ auto llvmFnType = LLVM::LLVMType::getFunctionTy(llvmResTy, {llvmResTy},
+ /*isVarArg=*/false);
+ rewriter.create<LLVM::LLVMFuncOp>(loc, intrinsicName, llvmFnType);
+ }
+
+ // Create the call and replace cir.sqrt
+ auto callee = SymbolRefAttr::get(ctx, intrinsicName);
+ rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, llvmResTy, callee,
+ ArrayRef<Value>{llvmOperand});
+
+ return mlir::success();
+}
namespace cir {
namespace direct {
@@ -284,7 +375,10 @@ void convertSideEffectForCall(mlir::Operation *callOp, bool isNothrow,
memoryEffect = mlir::LLVM::MemoryEffectsAttr::get(
callOp->getContext(), /*other=*/ModRefInfo::Ref,
/*argMem=*/ModRefInfo::Ref,
- /*inaccessibleMem=*/ModRefInfo::Ref);
+ /*inaccessibleMem=*/ModRefInfo::Ref,
+ /*errnoMem=*/ModRefInfo::Ref,
+ /*targetMem0=*/ModRefInfo::Ref,
+ /*targetMem1=*/ModRefInfo::Ref);
noUnwind = true;
willReturn = true;
break;
@@ -293,7 +387,10 @@ void convertSideEffectForCall(mlir::Operation *callOp, bool isNothrow,
memoryEffect = mlir::LLVM::MemoryEffectsAttr::get(
callOp->getContext(), /*other=*/ModRefInfo::NoModRef,
/*argMem=*/ModRefInfo::NoModRef,
- /*inaccessibleMem=*/ModRefInfo::NoModRef);
+ /*inaccessibleMem=*/ModRefInfo::NoModRef,
+ /*errnoMem=*/ModRefInfo::NoModRef,
+ /*targetMem0=*/ModRefInfo::NoModRef,
+ /*targetMem1=*/ModRefInfo::NoModRef);
noUnwind = true;
willReturn = true;
break;
@@ -670,6 +767,18 @@ mlir::LogicalResult CIRToLLVMASinOpLowering::matchAndRewrite(
return mlir::success();
}
+mlir::LogicalResult CIRToLLVMIsFPClassOpLowering::matchAndRewrite(
+ cir::IsFPClassOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ mlir::Value src = adaptor.getSrc();
+ cir::FPClassTest flags = adaptor.getFlags();
+ mlir::IntegerType retTy = rewriter.getI1Type();
+
+ rewriter.replaceOpWithNewOp<mlir::LLVM::IsFPClass>(
+ op, retTy, src, static_cast<uint32_t>(flags));
+ return mlir::success();
+}
+
mlir::LogicalResult CIRToLLVMAssumeOpLowering::matchAndRewrite(
cir::AssumeOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
@@ -1995,7 +2104,6 @@ void CIRToLLVMGlobalOpLowering::setupRegionInitializedLLVMGlobalOp(
// attributes are available on cir.global ops. This duplicates code
// in CIRToLLVMGlobalOpLowering::matchAndRewrite() but that will go
// away when the placeholders are no longer needed.
- assert(!cir::MissingFeatures::opGlobalConstant());
const bool isConst = op.getConstant();
assert(!cir::MissingFeatures::addressSpace());
const unsigned addrSpace = 0;
@@ -2055,8 +2163,7 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite(
convertTypeForMemory(*getTypeConverter(), dataLayout, cirSymType);
// FIXME: These default values are placeholders until the the equivalent
// attributes are available on cir.global ops.
- assert(!cir::MissingFeatures::opGlobalConstant());
- const bool isConst = false;
+ const bool isConst = op.getConstant();
assert(!cir::MissingFeatures::addressSpace());
const unsigned addrSpace = 0;
const bool isDsoLocal = op.getDsoLocal();
@@ -2570,6 +2677,120 @@ mlir::LogicalResult CIRToLLVMCmpOpLowering::matchAndRewrite(
return cmpOp.emitError() << "unsupported type for CmpOp: " << type;
}
+mlir::LogicalResult CIRToLLVMBinOpOverflowOpLowering::matchAndRewrite(
+ cir::BinOpOverflowOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ mlir::Location loc = op.getLoc();
+ cir::BinOpOverflowKind arithKind = op.getKind();
+ cir::IntType operandTy = op.getLhs().getType();
+ cir::IntType resultTy = op.getResult().getType();
+
+ EncompassedTypeInfo encompassedTyInfo =
+ computeEncompassedTypeWidth(operandTy, resultTy);
+ mlir::IntegerType encompassedLLVMTy =
+ rewriter.getIntegerType(encompassedTyInfo.width);
+
+ mlir::Value lhs = adaptor.getLhs();
+ mlir::Value rhs = adaptor.getRhs();
+ if (operandTy.getWidth() < encompassedTyInfo.width) {
+ if (operandTy.isSigned()) {
+ lhs = mlir::LLVM::SExtOp::create(rewriter, loc, encompassedLLVMTy, lhs);
+ rhs = mlir::LLVM::SExtOp::create(rewriter, loc, encompassedLLVMTy, rhs);
+ } else {
+ lhs = mlir::LLVM::ZExtOp::create(rewriter, loc, encompassedLLVMTy, lhs);
+ rhs = mlir::LLVM::ZExtOp::create(rewriter, loc, encompassedLLVMTy, rhs);
+ }
+ }
+
+ std::string intrinName = getLLVMIntrinName(arithKind, encompassedTyInfo.sign,
+ encompassedTyInfo.width);
+ auto intrinNameAttr = mlir::StringAttr::get(op.getContext(), intrinName);
+
+ mlir::IntegerType overflowLLVMTy = rewriter.getI1Type();
+ auto intrinRetTy = mlir::LLVM::LLVMStructType::getLiteral(
+ rewriter.getContext(), {encompassedLLVMTy, overflowLLVMTy});
+
+ auto callLLVMIntrinOp = mlir::LLVM::CallIntrinsicOp::create(
+ rewriter, loc, intrinRetTy, intrinNameAttr, mlir::ValueRange{lhs, rhs});
+ mlir::Value intrinRet = callLLVMIntrinOp.getResult(0);
+
+ mlir::Value result = mlir::LLVM::ExtractValueOp::create(
+ rewriter, loc, intrinRet, ArrayRef<int64_t>{0})
+ .getResult();
+ mlir::Value overflow = mlir::LLVM::ExtractValueOp::create(
+ rewriter, loc, intrinRet, ArrayRef<int64_t>{1})
+ .getResult();
+
+ if (resultTy.getWidth() < encompassedTyInfo.width) {
+ mlir::Type resultLLVMTy = getTypeConverter()->convertType(resultTy);
+ auto truncResult =
+ mlir::LLVM::TruncOp::create(rewriter, loc, resultLLVMTy, result);
+
+ // Extend the truncated result back to the encompassing type to check for
+ // any overflows during the truncation.
+ mlir::Value truncResultExt;
+ if (resultTy.isSigned())
+ truncResultExt = mlir::LLVM::SExtOp::create(
+ rewriter, loc, encompassedLLVMTy, truncResult);
+ else
+ truncResultExt = mlir::LLVM::ZExtOp::create(
+ rewriter, loc, encompassedLLVMTy, truncResult);
+ auto truncOverflow = mlir::LLVM::ICmpOp::create(
+ rewriter, loc, mlir::LLVM::ICmpPredicate::ne, truncResultExt, result);
+
+ result = truncResult;
+ overflow = mlir::LLVM::OrOp::create(rewriter, loc, overflow, truncOverflow);
+ }
+
+ mlir::Type boolLLVMTy =
+ getTypeConverter()->convertType(op.getOverflow().getType());
+ if (boolLLVMTy != rewriter.getI1Type())
+ overflow = mlir::LLVM::ZExtOp::create(rewriter, loc, boolLLVMTy, overflow);
+
+ rewriter.replaceOp(op, mlir::ValueRange{result, overflow});
+
+ return mlir::success();
+}
+
+std::string CIRToLLVMBinOpOverflowOpLowering::getLLVMIntrinName(
+ cir::BinOpOverflowKind opKind, bool isSigned, unsigned width) {
+ // The intrinsic name is `@llvm.{s|u}{opKind}.with.overflow.i{width}`
+
+ std::string name = "llvm.";
+
+ if (isSigned)
+ name.push_back('s');
+ else
+ name.push_back('u');
+
+ switch (opKind) {
+ case cir::BinOpOverflowKind::Add:
+ name.append("add.");
+ break;
+ case cir::BinOpOverflowKind::Sub:
+ name.append("sub.");
+ break;
+ case cir::BinOpOverflowKind::Mul:
+ name.append("mul.");
+ break;
+ }
+
+ name.append("with.overflow.i");
+ name.append(std::to_string(width));
+
+ return name;
+}
+
+CIRToLLVMBinOpOverflowOpLowering::EncompassedTypeInfo
+CIRToLLVMBinOpOverflowOpLowering::computeEncompassedTypeWidth(
+ cir::IntType operandTy, cir::IntType resultTy) {
+ bool sign = operandTy.getIsSigned() || resultTy.getIsSigned();
+ unsigned width =
+ std::max(operandTy.getWidth() + (sign && operandTy.isUnsigned()),
+ resultTy.getWidth() + (sign && resultTy.isUnsigned()));
+ return {sign, width};
+}
+
mlir::LogicalResult CIRToLLVMShiftOpLowering::matchAndRewrite(
cir::ShiftOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
@@ -3100,6 +3321,90 @@ mlir::LogicalResult CIRToLLVMAllocExceptionOpLowering::matchAndRewrite(
return mlir::success();
}
+static mlir::LLVM::LLVMStructType
+getLLVMLandingPadStructTy(mlir::ConversionPatternRewriter &rewriter) {
+ // Create the landing pad type: struct { ptr, i32 }
+ mlir::MLIRContext *ctx = rewriter.getContext();
+ auto llvmPtr = mlir::LLVM::LLVMPointerType::get(ctx);
+ llvm::SmallVector<mlir::Type> structFields = {llvmPtr, rewriter.getI32Type()};
+ return mlir::LLVM::LLVMStructType::getLiteral(ctx, structFields);
+}
+
+mlir::LogicalResult CIRToLLVMEhInflightOpLowering::matchAndRewrite(
+ cir::EhInflightOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ auto llvmFn = op->getParentOfType<mlir::LLVM::LLVMFuncOp>();
+ assert(llvmFn && "expected LLVM function parent");
+ mlir::Block *entryBlock = &llvmFn.getRegion().front();
+ assert(entryBlock->isEntryBlock());
+
+ mlir::ArrayAttr catchListAttr = op.getCatchTypeListAttr();
+ mlir::SmallVector<mlir::Value> catchSymAddrs;
+
+ auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext());
+ mlir::Location loc = op.getLoc();
+
+ // %landingpad = landingpad { ptr, i32 }
+ // Note that since llvm.landingpad has to be the first operation on the
+ // block, any needed value for its operands has to be added somewhere else.
+ if (catchListAttr) {
+ // catch ptr @_ZTIi
+ // catch ptr @_ZTIPKc
+ for (mlir::Attribute catchAttr : catchListAttr) {
+ auto symAttr = cast<mlir::FlatSymbolRefAttr>(catchAttr);
+ // Generate `llvm.mlir.addressof` for each symbol, and place those
+ // operations in the LLVM function entry basic block.
+ mlir::OpBuilder::InsertionGuard guard(rewriter);
+ rewriter.setInsertionPointToStart(entryBlock);
+ mlir::Value addrOp = mlir::LLVM::AddressOfOp::create(
+ rewriter, loc, llvmPtrTy, symAttr.getValue());
+ catchSymAddrs.push_back(addrOp);
+ }
+ } else if (!op.getCleanup()) {
+ // We need to emit catch-all only if cleanup is not set, because when we
+ // have catch-all handler, there is no case when we set would unwind past
+ // the handler
+ mlir::OpBuilder::InsertionGuard guard(rewriter);
+ rewriter.setInsertionPointToStart(entryBlock);
+ mlir::Value nullOp = mlir::LLVM::ZeroOp::create(rewriter, loc, llvmPtrTy);
+ catchSymAddrs.push_back(nullOp);
+ }
+
+ // %slot = extractvalue { ptr, i32 } %x, 0
+ // %selector = extractvalue { ptr, i32 } %x, 1
+ mlir::LLVM::LLVMStructType llvmLandingPadStructTy =
+ getLLVMLandingPadStructTy(rewriter);
+ auto landingPadOp = mlir::LLVM::LandingpadOp::create(
+ rewriter, loc, llvmLandingPadStructTy, catchSymAddrs);
+
+ if (op.getCleanup())
+ landingPadOp.setCleanup(true);
+
+ mlir::Value slot =
+ mlir::LLVM::ExtractValueOp::create(rewriter, loc, landingPadOp, 0);
+ mlir::Value selector =
+ mlir::LLVM::ExtractValueOp::create(rewriter, loc, landingPadOp, 1);
+ rewriter.replaceOp(op, mlir::ValueRange{slot, selector});
+
+ // Landing pads are required to be in LLVM functions with personality
+ // attribute.
+ // TODO(cir): for now hardcode personality creation in order to start
+ // adding exception tests, once we annotate CIR with such information,
+ // change it to be in FuncOp lowering instead.
+ mlir::OpBuilder::InsertionGuard guard(rewriter);
+ // Insert personality decl before the current function.
+ rewriter.setInsertionPoint(llvmFn);
+ auto personalityFnTy =
+ mlir::LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {},
+ /*isVarArg=*/true);
+
+ const StringRef fnName = "__gxx_personality_v0";
+ createLLVMFuncOpIfNotExist(rewriter, op, fnName, personalityFnTy);
+ llvmFn.setPersonality(fnName);
+
+ return mlir::success();
+}
+
mlir::LogicalResult CIRToLLVMTrapOpLowering::matchAndRewrite(
cir::TrapOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
@@ -3843,6 +4148,12 @@ mlir::LogicalResult CIRToLLVMBlockAddressOpLowering::matchAndRewrite(
return mlir::failure();
}
+mlir::LogicalResult CIRToLLVMAwaitOpLowering::matchAndRewrite(
+ cir::AwaitOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ return mlir::failure();
+}
+
std::unique_ptr<mlir::Pass> createConvertCIRToLLVMPass() {
return std::make_unique<ConvertCIRToLLVMPass>();
}
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
index 0591de545b81d..be6a380372efe 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
@@ -12,11 +12,25 @@
#ifndef CLANG_CIR_LOWERTOLLVM_H
#define CLANG_CIR_LOWERTOLLVM_H
+#include "mlir/Conversion/PatternRewriter.h"
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Transforms/DialectConversion.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
+namespace cir {
+class SqrtOp;
+}
+
+class CIRToLLVMSqrtOpLowering : public mlir::OpConversionPattern<cir::SqrtOp> {
+public:
+ using mlir::OpConversionPattern<cir::SqrtOp>::OpConversionPattern;
+
+ mlir::LogicalResult
+ matchAndRewrite(cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const override;
+};
+
namespace cir {
namespace direct {
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
new file mode 100644
index 0000000000000..6e1dace82928c
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
@@ -0,0 +1,46 @@
+// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtss, sqrtsd, etc.)
+// RUN: %clang_cc1 -fcir -triple x86_64-unknown-linux-gnu -O0 %s -emit-cir -o - | FileCheck %s
+
+#include <immintrin.h>
+
+// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit)
+__m128 test_sqrtps(__m128 x) {
+ return __builtin_ia32_sqrtps(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit)
+__m256 test_sqrtps256(__m256 x) {
+ return __builtin_ia32_sqrtps256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit)
+__m512 test_sqrtps512(__m512 x) {
+ return __builtin_ia32_sqrtps512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps512
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit)
+__m128d test_sqrtpd(__m128d x) {
+ return __builtin_ia32_sqrtpd(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit)
+__m256d test_sqrtpd256(__m256d x) {
+ return __builtin_ia32_sqrtpd256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit)
+__m512d test_sqrtpd512(__m512d x) {
+ return __builtin_ia32_sqrtpd512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd512
+// CHECK: cir.sqrt
>From 4a39fd7185cd294b96a4faadc2fa21f2a4d53b6b Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Sat, 29 Nov 2025 09:59:40 +0530
Subject: [PATCH 02/32] Implement sqrt builtins for all vector sizes
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 20 ++----
.../test/CIR/CodeGen/X86/cir-sqrt-builtins.c | 67 +++++++++++++++++++
2 files changed, 73 insertions(+), 14 deletions(-)
create mode 100644 clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 45c0de322925a..f8a139ec7a8e0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -786,24 +786,16 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_sqrtpd256:
case X86::BI__builtin_ia32_sqrtpd:
case X86::BI__builtin_ia32_sqrtps256:
- case X86::BI__builtin_ia32_sqrtps: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- assert(expr->getNumArgs() == 1 && "__builtin_ia32_sqrtps takes one argument");
- mlir::Value arg = emitScalarExpr(expr->getArg(0));
- mlir::Type argTy = arg.getType();
- if (auto vecTy = argTy.dyn_cast<mlir::VectorType>()) {
- assert(vecTy.getNumElements() == 4 &&
- vecTy.getElementType().isa<mlir::FloatType>() &&
- "__builtin_ia32_sqrtps expects <4 x float> / __m128");
- }
- auto sqrt = cir::SqrtOp::create(builder, loc, argTy, arg);
- return sqrt.getResult();
- }
+ case X86::BI__builtin_ia32_sqrtps:
case X86::BI__builtin_ia32_sqrtph256:
case X86::BI__builtin_ia32_sqrtph:
case X86::BI__builtin_ia32_sqrtph512:
case X86::BI__builtin_ia32_sqrtps512:
- case X86::BI__builtin_ia32_sqrtpd512:
+ case X86::BI__builtin_ia32_sqrtpd512: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ mlir::Value arg = ops[0];
+ return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult();
+ }
case X86::BI__builtin_ia32_pmuludq128:
case X86::BI__builtin_ia32_pmuludq256:
case X86::BI__builtin_ia32_pmuludq512:
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
new file mode 100644
index 0000000000000..ef5cb954e3efe
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
@@ -0,0 +1,67 @@
+// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtph, etc.)
+// RUN: %clang_cc1 -fclangir -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -emit-cir %s -o - | FileCheck %s
+
+#include <immintrin.h>
+
+// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit)
+__m128 test_sqrtps(__m128 x) {
+ return __builtin_ia32_sqrtps(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit)
+__m256 test_sqrtps256(__m256 x) {
+ return __builtin_ia32_sqrtps256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit)
+__m512 test_sqrtps512(__m512 x) {
+ return __builtin_ia32_sqrtps512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps512
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit)
+__m128d test_sqrtpd(__m128d x) {
+ return __builtin_ia32_sqrtpd(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit)
+__m256d test_sqrtpd256(__m256d x) {
+ return __builtin_ia32_sqrtpd256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit)
+__m512d test_sqrtpd512(__m512d x) {
+ return __builtin_ia32_sqrtpd512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd512
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtph - half precision vector sqrt (128-bit)
+__m128h test_sqrtph(__m128h x) {
+ return __builtin_ia32_sqrtph(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtph
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtph256 - half precision vector sqrt (256-bit)
+__m256h test_sqrtph256(__m256h x) {
+ return __builtin_ia32_sqrtph256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtph256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtph512 - half precision vector sqrt (512-bit)
+__m512h test_sqrtph512(__m512h x) {
+ return __builtin_ia32_sqrtph512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtph512
+// CHECK: cir.sqrt
\ No newline at end of file
>From ef3fd9711494e864190932566bcfe46231b95c51 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Sun, 30 Nov 2025 11:44:23 +0530
Subject: [PATCH 03/32] Test file renamed
---
.../CIR/CodeGen/X86/cir-sqrtps-builtins.c | 46 -------------------
1 file changed, 46 deletions(-)
delete mode 100644 clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
deleted file mode 100644
index 6e1dace82928c..0000000000000
--- a/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
+++ /dev/null
@@ -1,46 +0,0 @@
-// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtss, sqrtsd, etc.)
-// RUN: %clang_cc1 -fcir -triple x86_64-unknown-linux-gnu -O0 %s -emit-cir -o - | FileCheck %s
-
-#include <immintrin.h>
-
-// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit)
-__m128 test_sqrtps(__m128 x) {
- return __builtin_ia32_sqrtps(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit)
-__m256 test_sqrtps256(__m256 x) {
- return __builtin_ia32_sqrtps256(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps256
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit)
-__m512 test_sqrtps512(__m512 x) {
- return __builtin_ia32_sqrtps512(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps512
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit)
-__m128d test_sqrtpd(__m128d x) {
- return __builtin_ia32_sqrtpd(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit)
-__m256d test_sqrtpd256(__m256d x) {
- return __builtin_ia32_sqrtpd256(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd256
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit)
-__m512d test_sqrtpd512(__m512d x) {
- return __builtin_ia32_sqrtpd512(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd512
-// CHECK: cir.sqrt
>From 97056731fce0a5e5c2185e16986e0189cec95c7b Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Tue, 2 Dec 2025 21:29:57 +0530
Subject: [PATCH 04/32] Add sqrt changes patch
---
my-sqrt-changes.patch | Bin 0 -> 12058 bytes
1 file changed, 0 insertions(+), 0 deletions(-)
create mode 100644 my-sqrt-changes.patch
diff --git a/my-sqrt-changes.patch b/my-sqrt-changes.patch
new file mode 100644
index 0000000000000000000000000000000000000000..87c0ca69ac8abe6aaa684ffbbce3c65e342f6066
GIT binary patch
literal 12058
zcmdU#Z%-V_5yo4}yGZ#CbNCYBz=91N*n~S7WA4rpIa`=G7o8$*fn7{u*uCAw4m$Z$
z<U{2H<$aRp*R at 6WjAxexmdK}(%}h^sRdsd!d8%gr`=6t5656_o(9m_Pr-AO9VKE$r
zxB7dgu|<uZ#pm;I6k3&cTj6=w)z~x5YHCeOGY5LMQG5F$bVENZgg2snP+31c+P?|E
zj`AVHK>v=!tu3i|F2ZT(#J5Fw7#`?a4J*1>g9PmBADXqIw at 2E?gZTD9vmS+IJum2D
zRow**wAax3yVD$8=HgXIgUI_%f6v0X_V+gK<4JfT2^!kLd{_!=x}TH0^ZHll&67B%
zp*4%)%XlI0$LEtq3vtQMw2%h7;@i_M2YS;H_kQGPyPx#`-$zYt4foOrJex$wX4)*@
z72&0L_M&ZeqO}fWOPuPCuGis at x_hbjo$xn}Vt?}*IUk;DPxn55Q#)vChOa)+eJkz*
zYVV{s&bbKhwLjM69JXa4%<k%q?|vbZw+OHF7CLV<V-RhAJ?fhidM689kc7SHQ9JUF
zt*{-w)0%CK4 at 4W!YsVRd=9eYIM(y2cru{1Sm-52zrOlnv>V|k>FK6O_x1Z>~7uLls
z*GA)V?|;_(z3^?e_l8zf_g;5SZda39!rjli%C7cwRwvPlQ=~vw7X5m<!@}Kq>g}Ji
zNpsAjs=kw-RMOn1uP>AJUbGvFlH`glS=gp*_nWXkj3E26yyqI*ku`70diM3Sd-?W-
zyz`Zwz8!kYy=aeU{7f3YJ-sAlo~Md1BcH>Qdh$ZcM^6<M$OxQqZ<v#2E~P>qPTum|
zxb=9_Sx9;9<}~)gMU1MJAFo3P?$5Lj(<AZ9q7r#_QaZ#0G|`Sx5o<?N#2Q9jHaIa_
zo}2R-eF8a-m;@_qipyZ4v$-y1-%VN5(~~?|!$P!<!^=}}9>icnT;%Y at TU-AB7rhN6
z``zI;sG}$R3hxAsHHBp!%Oh80kB`F_dIGDql`j{Rtsd+7K-qd(Z(2fXOS)gtsw%#-
zEOU3*%q4epc2@}3`8dv7)=nR3 at AKL_ST8*<OE-r3$U{U3M|#BNvltVJyqog((-_5y
zhzHC+_v6ThthX88&qOr!gW@@P at jUwGiSSHIxT_U$bx$#DJD!Z8uYUMz_)0u~jwf2^
z4nzYg2OB|kOLY2r?r1$y^dqGM&AJF*M+{^*rw}saME@|P^kMiHOPPy4^r`1ilM-zB
zlQcJoy8R_dZ?aacDBn%J?evHHb?bQ<EvHqojJc9!^flvD7fAm?Z#elGS;Je|gH;?<
zqYdSvPK***6I{;Y3UCA!Ku0>B8QRNSw3ad#cG8y}_e3KhdE at rO&|+-`Rtr9a@{d{p
zj#!8sImaJ0+7SivWU7Y>&BLa<l8S7ZqsLbCfm4l=NgJ`o!0t!w0{VC^nH at SrMx$$J
zGT&V;Ynn&ZYko%DhEpq2V^357cDAhb>z}{+SNOM{n)=tiLi2~3MUTMhD?>5VUDP<^
zDHA)W(wIrD^+tE5iL#DB)kc at yql0lWjn|4{d8%dXCeckAvWP=%n9zK#%e02<L=`y@
zwF}vV?Y6vUs>Nos1R`I*R@;UFutwKpnFgow+o&Uap=yD=BTG`reRlb_EgPY?Q704h
zjAwoVbv)l`<|v-Fw#qlBxKWX3S-H80vaTpEBz_+h4J at +fwrmpSnx_v1_UJ^fG9O51
zh;DPsFbkxbPgd5 at +O~Wy*-w(fGZU74EDr9W*c&>Sjh at Go=f_RSL!R=;^g=6NM{X~~
zA&ooYTo0dT-h2OY&zy&I&+QXlT*lpYeOvxH{Ukx17Ou+1A4kc2cf`u|7#-bTW-GfV
zx){c9=v3)*3`W?5WqY#?v)oTT+q-}LNfv^}t=H64^yQqX5YYH9V=t3c7u44vUa#%8
z<Z0yCZ}ji%a@?N0`nKx%d!g4=MbATNr7N9Oar3k&qj91MQV}nyDGp*au_SM$3%nX5
zCRIf%RuxVC-_rPz<}JlaWAdtEno^IdD5lB7tBEz~Wm!o7p)RV4^t`NRo8{e<b$R&f
zn6u8MCy$GE#$PLf(naxZ1aqkpKrwdWWh)?0cI7Lxaa~{Yj<w#p_XirITVwX}T-}cO
zq5lva$(n{5j4qs>s=nFRRbII-DoyQfNpBpN_Lb-N#h2)cZR|@%=>1((%txoe1G;z3
zqd%5M!m?mKWa+7M_ZR7DRyf)BZ2C@;8YZ=lds&x#txEaB@)XFM>W<owUg*!^!)3*G
zdo6Z#I!p4`)hV6~G^cmYeBGM9gobOZhUTk`T}^2j<P37?$OjB197p}{#F1Im;tk;}
z8*~5iclT>fFZXMkmsd2m<>Qi*_YGTMS7j-`)2`?QfWZ2CcR%h6{6jSQoi1v@$vm6W
z039~v%T|H$rHVLJMR>`QlZ?48@)&1pP1_#NUd0$>HIVDMyf53)tU6Ok at AbSKE4q|C
z8#m*;Fg#D6hmH(8rwG0zADJJ1`CF4kJ{mrKiqSi1A-^-s&K`Zg+YvcACeL?Im$k1m
z-H_#w5vgd`YtZ*sLwmxCQe at A$ru*yJ7<hsX0ND<k+Yn~3Iw#c`^ug%#V!NlZW=^Hf
z?hNbJ`MGB-vj&gGpc(oRbTYu1bQ?DG<a&6O^_$Ruc`^05*%O*fPi{*oTckKs=3mdD
zi2XkFpUX3QnTJhU_)R89I+1c_aar6tcC~BzMqq^G5w_ZUg*@F3D|Cew^7wu;Wc)0m
z5b6Nz#lJ`yZZ5x at 921w<O+vGZhL;sHs8X<@mUj72_YakALBp#$zqamIgq;p`w<cV?
zqOqH!;hV`jDGt6Fb&sEXPgs#p;GrO3f2O-`bLd!GW<1nb9(Bl?{CdVZIk{D0d-U55
zm5ms8ltd5VNYSc{mS;83Jvg6D<w#$nEBR6k>t*!H^57GPy6_Imr*$3l#x?ffQsr?4
ze<xzoOKmHjSl-SNONt^?V8ompuRPb7b#qwhKAp#SliH$UqGcVCDz#keSJ6X<Mit4u
z$72*fi+z{G<9^h5ip*uLI~L%CwxtO}*kJ3r4i;-YUgaIEyi=R{kSR|zG8wEtwmCds
zdf1@#)I;=oY*F85_?^kF-lu$oRL2?_=q$)xd<`9~jqByu at tVs0Wt4qhwNHzd<=!5C
z!5OAK*6%6VPeK`Hjc%I@^2iJ6XFqBaZ_hKTb@;4n?KW!I0<@Z2JsHRCD)Dmro))61
zY8$R)+nh|}taJ2{ETK$kl$EFXP~n=1GVoP~h=F~9IP%jm3#@*}$?ey^3z+6i-yipH
z>!^(SdXW23d8f!<j-2BoxktfW?aU)No<A-UnqJIn^L1#3T~jgJ*BSaCXw1G4*%u#+
z4Aukb3g=MA97XH0-)Q(;A^`b;_24Y4{_u0MkL?plOpf>B)YR;5j8epdb|n*WYGf#4
z3t7*40d4)|`<By1UW*?t)A(PKRy$W{`@Lw8L5NHCJ5s2Ek at ob~f8n>NJD;16PmYz+
z`rGnCo6txd#WhuU52V9YjU5UxeG!oq80xXG)0)QUF+LEoSkiqn)(MmTPT}r!#y%#+
zR at 2p<V=jKd;1#RgJK8dfy%+U7>Ni>Rn$Ly^gw7#P*oplnl(&3Owo#AR`5OB at Q~DiK
z5ytbY;>%~^w5JS<)!N<7X*m*f4DZiGm+~)`t|y~w!+!r}|NB^#_I3B~bjnoATDrML
i>Z$uV<~q4bZj<k6%t_u&-c)?ycV>LO at Ne}=*8c%%jt^@9
literal 0
HcmV?d00001
>From 21119e5ae7529285662c0e9dc6c0024e07a5899b Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:19:33 +0530
Subject: [PATCH 05/32] group with other floating point ops
---
clang/include/clang/CIR/Dialect/IR/CIROps.td | 41 ++--------
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 4 +-
.../test/CIR/CodeGen/X86/cir-sqrt-builtins.c | 80 +++++--------------
3 files changed, 31 insertions(+), 94 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 2dc71c68f8a94..dc9e3c6a486d6 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -3275,39 +3275,6 @@ def CIR_InlineAsmOp : CIR_Op<"asm", [RecursiveMemoryEffects]> {
let hasCustomAssemblyFormat = 1;
}
-//===----------------------------------------------------------------------===//
-// SqrtOp
-//===----------------------------------------------------------------------===//
-
-def CIR_SqrtOp : CIR_Op<"sqrt", [Pure]> {
- let summary = "Floating-point square root";
-
- let description = [{
- The `cir.sqrt` operation computes the element-wise square root of its input.
-
- The input must be either:
- • a floating-point scalar type, or
- • a vector whose element type is floating-point.
-
- The result type must match the input type exactly.
-
- Examples:
- // scalar
- %r = cir.sqrt %x : !cir.fp64
-
- // vector
- %v = cir.sqrt %vec : !cir.vector<!cir.fp32 x 4>
- }];
-
- // input and output types: float or vector-of-float
- let arguments = (ins CIR_AnyFloatOrVecOfFloatType:$input);
- let results = (outs CIR_AnyFloatOrVecOfFloatType:$result);
-
- let assemblyFormat = [{
- $input `:` type($input) attr-dict
- }];
-}
-
//===----------------------------------------------------------------------===//
// UnreachableOp
//===----------------------------------------------------------------------===//
@@ -4664,6 +4631,14 @@ def CIR_PtrDiffOp : CIR_Op<"ptr_diff", [Pure, SameTypeOperands]> {
// Floating Point Ops
//===----------------------------------------------------------------------===//
+def CIR_SqrtOp : CIR_UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp"> {
+ let summary = "Floating-point square root operation";
+
+ let description = [{
+ Computes the square root of a floating-point value or vector.
+ }];
+}
+
class CIR_UnaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
: CIR_Op<mnemonic, [Pure, SameOperandsAndResultType]>
{
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index f8a139ec7a8e0..35ba0f48ce6d8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -781,14 +781,14 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_sqrtsh_round_mask:
case X86::BI__builtin_ia32_sqrtsd_round_mask:
case X86::BI__builtin_ia32_sqrtss_round_mask:
- errorNYI("masked round sqrt builtins");
- return {};
case X86::BI__builtin_ia32_sqrtpd256:
case X86::BI__builtin_ia32_sqrtpd:
case X86::BI__builtin_ia32_sqrtps256:
case X86::BI__builtin_ia32_sqrtps:
case X86::BI__builtin_ia32_sqrtph256:
case X86::BI__builtin_ia32_sqrtph:
+ errorNYI("Unimplemented builtin");
+ return {};
case X86::BI__builtin_ia32_sqrtph512:
case X86::BI__builtin_ia32_sqrtps512:
case X86::BI__builtin_ia32_sqrtpd512: {
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
index ef5cb954e3efe..97993cabf0ebf 100644
--- a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
@@ -1,67 +1,29 @@
-// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtph, etc.)
-// RUN: %clang_cc1 -fclangir -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -emit-cir %s -o - | FileCheck %s
-
#include <immintrin.h>
+// Test X86-specific sqrt builtins
-// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit)
-__m128 test_sqrtps(__m128 x) {
- return __builtin_ia32_sqrtps(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit)
-__m256 test_sqrtps256(__m256 x) {
- return __builtin_ia32_sqrtps256(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps256
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit)
-__m512 test_sqrtps512(__m512 x) {
- return __builtin_ia32_sqrtps512(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps512
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit)
-__m128d test_sqrtpd(__m128d x) {
- return __builtin_ia32_sqrtpd(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit)
-__m256d test_sqrtpd256(__m256d x) {
- return __builtin_ia32_sqrtpd256(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd256
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit)
-__m512d test_sqrtpd512(__m512d x) {
- return __builtin_ia32_sqrtpd512(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd512
-// CHECK: cir.sqrt
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
-// Test __builtin_ia32_sqrtph - half precision vector sqrt (128-bit)
-__m128h test_sqrtph(__m128h x) {
- return __builtin_ia32_sqrtph(x);
+// Test __builtin_ia32_sqrtph512
+__m512h test_sqrtph512(__m512h a) {
+ return __builtin_ia32_sqrtph512(a);
}
-// CHECK-LABEL: cir.func @test_sqrtph
-// CHECK: cir.sqrt
+// CHECK: cir.func @test_sqrtph512
+// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.fp16 x 32>
+// CHECK: cir.return [[RES]]
-// Test __builtin_ia32_sqrtph256 - half precision vector sqrt (256-bit)
-__m256h test_sqrtph256(__m256h x) {
- return __builtin_ia32_sqrtph256(x);
+// Test __builtin_ia32_sqrtps512
+__m512 test_sqrtps512(__m512 a) {
+ return __builtin_ia32_sqrtps512(a);
}
-// CHECK-LABEL: cir.func @test_sqrtph256
-// CHECK: cir.sqrt
+// CHECK: cir.func @test_sqrtps512
+// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.float x 16>
+// CHECK: cir.return [[RES]]
-// Test __builtin_ia32_sqrtph512 - half precision vector sqrt (512-bit)
-__m512h test_sqrtph512(__m512h x) {
- return __builtin_ia32_sqrtph512(x);
+// Test __builtin_ia32_sqrtpd512
+__m512d test_sqrtpd512(__m512d a) {
+ return __builtin_ia32_sqrtpd512(a);
}
-// CHECK-LABEL: cir.func @test_sqrtph512
-// CHECK: cir.sqrt
\ No newline at end of file
+// CHECK: cir.func @test_sqrtpd512
+// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.double x 8>
+// CHECK: cir.return [[RES]]
\ No newline at end of file
>From 90878ec8d8d6b5b46286c419c4187f01215b6e4b Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:25:20 +0530
Subject: [PATCH 06/32] place the implementation with other floating point ops
---
clang/include/clang/CIR/Dialect/IR/CIROps.td | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index dc9e3c6a486d6..fa10848f4397a 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -4631,14 +4631,6 @@ def CIR_PtrDiffOp : CIR_Op<"ptr_diff", [Pure, SameTypeOperands]> {
// Floating Point Ops
//===----------------------------------------------------------------------===//
-def CIR_SqrtOp : CIR_UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp"> {
- let summary = "Floating-point square root operation";
-
- let description = [{
- Computes the square root of a floating-point value or vector.
- }];
-}
-
class CIR_UnaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
: CIR_Op<mnemonic, [Pure, SameOperandsAndResultType]>
{
@@ -4650,6 +4642,14 @@ class CIR_UnaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
let llvmOp = llvmOpName;
}
+def CIR_SqrtOp : CIR_UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp"> {
+ let summary = "Floating-point square root operation";
+
+ let description = [{
+ Computes the square root of a floating-point value or vector.
+ }];
+}
+
def CIR_ACosOp : CIR_UnaryFPToFPBuiltinOp<"acos", "ACosOp"> {
let summary = "Computes the arcus cosine of the specified value";
let description = [{
>From 3529f40b48025f543a2b3ced9d6aa63a2241283f Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:30:58 +0530
Subject: [PATCH 07/32] Update
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 5514a4cd0876d..709e3026e51f1 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -45,30 +45,30 @@
using namespace cir;
using namespace llvm;
-using namespace mlir;
-static std::string getLLVMIntrinsicNameForType(Type llvmTy) {
+
+static std::string getLLVMIntrinsicNameForType(mlir::Type llvmTy) {
std::string s;
{
llvm::raw_string_ostream os(s);
llvm::Type *unused = nullptr;
os << llvmTy;
}
- if (auto vecTy = llvmTy.dyn_cast<LLVM::LLVMType>()) {
+ if (auto vecTy = llvmTy.dyn_cast<mlir::LLVM::LLVMType>()) {
}
return s;
}
// Actual lowering
-LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
+mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
- ConversionPatternRewriter &rewriter) const {
+ mlir::ConversionPatternRewriter &rewriter) const {
- Location loc = op.getLoc();
- MLIRContext *ctx = rewriter.getContext();
+ mlir::Location loc = op.getLoc();
+ mlir::MLIRContext *ctx = rewriter.getContext();
- Type cirResTy = op.getResult().getType();
- Type llvmResTy = getTypeConverter()->convertType(cirResTy);
+ mlir::Type cirResTy = op.getResult().getType();
+ mlir::Type llvmResTy = getTypeConverter()->convertType(cirResTy);
if (!llvmResTy)
return op.emitOpError(
"expected LLVM dialect result type for cir.sqrt lowering");
>From 92d0ac3ed203e38e244c0afabb5f3524d1772645 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:44:10 +0530
Subject: [PATCH 08/32] Update clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 35ba0f48ce6d8..eb9ac260f225d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -794,7 +794,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_sqrtpd512: {
mlir::Location loc = getLoc(expr->getExprLoc());
mlir::Value arg = ops[0];
- return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult();
+ return builder.create<cir::SqrtOp>(loc, arg.getType(), arg).getResult();
}
case X86::BI__builtin_ia32_pmuludq128:
case X86::BI__builtin_ia32_pmuludq256:
>From 0385662da0847396b4096ddad2c90bcf1c593d0f Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:45:45 +0530
Subject: [PATCH 09/32] Update
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 709e3026e51f1..a80103764a60a 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -51,7 +51,6 @@ static std::string getLLVMIntrinsicNameForType(mlir::Type llvmTy) {
std::string s;
{
llvm::raw_string_ostream os(s);
- llvm::Type *unused = nullptr;
os << llvmTy;
}
if (auto vecTy = llvmTy.dyn_cast<mlir::LLVM::LLVMType>()) {
>From ddcb7b8476e796d9945bbde26a39e567853da34e Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:51:05 +0530
Subject: [PATCH 10/32] update
clang\lib\CIR\Lowering\DirectToLLVM\LowerToLLVM.cpp
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 5514a4cd0876d..c17980f7ffbf7 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -54,8 +54,6 @@ static std::string getLLVMIntrinsicNameForType(Type llvmTy) {
llvm::Type *unused = nullptr;
os << llvmTy;
}
- if (auto vecTy = llvmTy.dyn_cast<LLVM::LLVMType>()) {
- }
return s;
}
>From 233efad67b9677f6e77034e6868905070708765a Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:56:02 +0530
Subject: [PATCH 11/32] Update
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
index be6a380372efe..1f69b7d66f25e 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
@@ -12,7 +12,6 @@
#ifndef CLANG_CIR_LOWERTOLLVM_H
#define CLANG_CIR_LOWERTOLLVM_H
-#include "mlir/Conversion/PatternRewriter.h"
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Transforms/DialectConversion.h"
>From 9d940bc80e60470e6f5dcc82d74e45dd361acdc2 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:57:44 +0530
Subject: [PATCH 12/32] Update
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index ebb41e42a2871..5dfef939126d0 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -120,8 +120,8 @@ mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
if (!module.lookupSymbol<LLVM::LLVMFuncOp>(intrinsicName)) {
OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPointToStart(module.getBody());
- auto llvmFnType = LLVM::LLVMType::getFunctionTy(llvmResTy, {llvmResTy},
- /*isVarArg=*/false);
+ auto llvmFnType = LLVM::LLVMFunctionType::get(ctx, llvmResTy, {llvmResTy},
+ /*isVarArg=*/false);
rewriter.create<LLVM::LLVMFuncOp>(loc, intrinsicName, llvmFnType);
}
>From 51bbccad4f784a4c44d6562ccef36caaf2f1b521 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 16:18:27 +0530
Subject: [PATCH 13/32] Remove BOM character
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index ebb41e42a2871..0395f905c866b 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//===-- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
>From e5789b65fc43637493e07979a9ac56dfd9cbee37 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 16:29:05 +0530
Subject: [PATCH 14/32] Apply suggestion from @Copilot
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 5dfef939126d0..11f042737d658 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
>From 8937b12959c7a4336b6857c1a57b54e6c99d5457 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 16:31:14 +0530
Subject: [PATCH 15/32] Apply suggestion from @andykaylor
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index eb9ac260f225d..35ba0f48ce6d8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -794,7 +794,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_sqrtpd512: {
mlir::Location loc = getLoc(expr->getExprLoc());
mlir::Value arg = ops[0];
- return builder.create<cir::SqrtOp>(loc, arg.getType(), arg).getResult();
+ return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult();
}
case X86::BI__builtin_ia32_pmuludq128:
case X86::BI__builtin_ia32_pmuludq256:
>From 8a02c504acf42c81bd0c53df89e296480b74c05b Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 16:44:08 +0530
Subject: [PATCH 16/32] add description
---
clang/include/clang/CIR/Dialect/IR/CIROps.td | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index fa10848f4397a..06eb7d6689362 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -4647,6 +4647,19 @@ def CIR_SqrtOp : CIR_UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp"> {
let description = [{
Computes the square root of a floating-point value or vector.
+
+ The input must be either:
+ • a floating-point scalar type, or
+ • a vector whose element type is floating-point.
+
+ The result type must match the input type exactly.
+
+ Examples:
+ // scalar
+ %r = cir.sqrt %x : !cir.fp64
+
+ // vector
+ %v = cir.sqrt %vec : !cir.vector<!cir.fp32 x 4>
}];
}
>From 82a9395517d79c79653194939c107234e1628d05 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 17:17:40 +0530
Subject: [PATCH 17/32] Remove undefined sqrt builtin cases
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 6 ------
1 file changed, 6 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 35ba0f48ce6d8..0b796e4e3a860 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -781,12 +781,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_sqrtsh_round_mask:
case X86::BI__builtin_ia32_sqrtsd_round_mask:
case X86::BI__builtin_ia32_sqrtss_round_mask:
- case X86::BI__builtin_ia32_sqrtpd256:
- case X86::BI__builtin_ia32_sqrtpd:
- case X86::BI__builtin_ia32_sqrtps256:
- case X86::BI__builtin_ia32_sqrtps:
- case X86::BI__builtin_ia32_sqrtph256:
- case X86::BI__builtin_ia32_sqrtph:
errorNYI("Unimplemented builtin");
return {};
case X86::BI__builtin_ia32_sqrtph512:
>From 6bd328210bcd68abe14e36895f6d587a54b99ed2 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 17:25:40 +0530
Subject: [PATCH 18/32] Remove unused getLLVMIntrinsicNameForType function
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 10 ----------
1 file changed, 10 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 5dfef939126d0..8b8b756a7f691 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -46,16 +46,6 @@
using namespace cir;
using namespace llvm;
-
-static std::string getLLVMIntrinsicNameForType(mlir::Type llvmTy) {
- std::string s;
- {
- llvm::raw_string_ostream os(s);
- os << llvmTy;
- }
- return s;
-}
-
// Actual lowering
mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
>From 8232ce8a4de0e8c179d42739b354695987be458f Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 17:37:11 +0530
Subject: [PATCH 19/32] Removed braces
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 8b8b756a7f691..4cbea38a30c50 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -62,9 +62,8 @@ mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
Value operand = adaptor.getInput();
Value llvmOperand = operand;
- if (operand.getType() != llvmResTy) {
+ if (operand.getType() != llvmResTy)
llvmOperand = rewriter.create<LLVM::BitcastOp>(loc, llvmResTy, operand);
- }
// Build the llvm.sqrt.* intrinsic name depending on scalar vs vector result
std::string intrinsicName = "llvm.sqrt.";
@@ -84,9 +83,8 @@ mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
suffix = "v" + std::to_string(n) + "f16";
else
return op.emitOpError("unsupported float width for sqrt");
- } else {
+ } else
return op.emitOpError("vector element must be floating point for sqrt");
- }
} else if (auto f = cirResTy.dyn_cast<cir::FloatType>()) {
// Scalar float
unsigned width = f.getWidth();
@@ -98,9 +96,8 @@ mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
suffix = "f16";
else
return op.emitOpError("unsupported float width for sqrt");
- } else {
+ } else
return op.emitOpError("unsupported type for cir.sqrt lowering");
- }
intrinsicName += suffix;
>From bc8e4ccfc22731aaee790659d8b5072ab36be7a7 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 17:39:45 +0530
Subject: [PATCH 20/32] Update
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 74 +------------------
1 file changed, 4 insertions(+), 70 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 4cbea38a30c50..e7a6e8677569a 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -47,77 +47,11 @@ using namespace cir;
using namespace llvm;
// Actual lowering
-mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
- cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
- mlir::ConversionPatternRewriter &rewriter) const {
-
- mlir::Location loc = op.getLoc();
- mlir::MLIRContext *ctx = rewriter.getContext();
-
- mlir::Type cirResTy = op.getResult().getType();
- mlir::Type llvmResTy = getTypeConverter()->convertType(cirResTy);
- if (!llvmResTy)
- return op.emitOpError(
- "expected LLVM dialect result type for cir.sqrt lowering");
-
- Value operand = adaptor.getInput();
- Value llvmOperand = operand;
- if (operand.getType() != llvmResTy)
- llvmOperand = rewriter.create<LLVM::BitcastOp>(loc, llvmResTy, operand);
-
- // Build the llvm.sqrt.* intrinsic name depending on scalar vs vector result
- std::string intrinsicName = "llvm.sqrt.";
- std::string suffix;
-
- // If the CIR result type is a vector, include the 'vN' part in the suffix.
- if (auto vec = cirResTy.dyn_cast<cir::VectorType>()) {
- Type elt = vec.getElementType();
- if (auto f = elt.dyn_cast<cir::FloatType>()) {
- unsigned width = f.getWidth();
- unsigned n = vec.getNumElements();
- if (width == 32)
- suffix = "v" + std::to_string(n) + "f32";
- else if (width == 64)
- suffix = "v" + std::to_string(n) + "f64";
- else if (width == 16)
- suffix = "v" + std::to_string(n) + "f16";
- else
- return op.emitOpError("unsupported float width for sqrt");
- } else
- return op.emitOpError("vector element must be floating point for sqrt");
- } else if (auto f = cirResTy.dyn_cast<cir::FloatType>()) {
- // Scalar float
- unsigned width = f.getWidth();
- if (width == 32)
- suffix = "f32";
- else if (width == 64)
- suffix = "f64";
- else if (width == 16)
- suffix = "f16";
- else
- return op.emitOpError("unsupported float width for sqrt");
- } else
- return op.emitOpError("unsupported type for cir.sqrt lowering");
-
- intrinsicName += suffix;
-
- // Ensure the llvm intrinsic function exists at module scope. Insert it at
- // the start of the module body using an insertion guard.
- ModuleOp module = op->getParentOfType<ModuleOp>();
- if (!module.lookupSymbol<LLVM::LLVMFuncOp>(intrinsicName)) {
- OpBuilder::InsertionGuard guard(rewriter);
- rewriter.setInsertionPointToStart(module.getBody());
- auto llvmFnType = LLVM::LLVMFunctionType::get(ctx, llvmResTy, {llvmResTy},
- /*isVarArg=*/false);
- rewriter.create<LLVM::LLVMFuncOp>(loc, intrinsicName, llvmFnType);
- }
-
- // Create the call and replace cir.sqrt
- auto callee = SymbolRefAttr::get(ctx, intrinsicName);
- rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, llvmResTy, callee,
- ArrayRef<Value>{llvmOperand});
-
+ mlir::Type resTy = typeConverter->convertType(op.getType());
+ rewriter.replaceOpWithNewOp<mlir::LLVM::SqrtOp>(op, resTy,
+ adaptor.getSrc());
return mlir::success();
+
}
namespace cir {
>From 92847619f4b000f6dcefe307543dcf6e7b917a14 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 19:37:22 +0530
Subject: [PATCH 21/32] update
clang\lib\CIR\Lowering\DirectToLLVM\LowerToLLVM.h
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 13 -------------
1 file changed, 13 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
index 1f69b7d66f25e..0591de545b81d 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
@@ -17,19 +17,6 @@
#include "mlir/Transforms/DialectConversion.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
-namespace cir {
-class SqrtOp;
-}
-
-class CIRToLLVMSqrtOpLowering : public mlir::OpConversionPattern<cir::SqrtOp> {
-public:
- using mlir::OpConversionPattern<cir::SqrtOp>::OpConversionPattern;
-
- mlir::LogicalResult
- matchAndRewrite(cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
- mlir::ConversionPatternRewriter &rewriter) const override;
-};
-
namespace cir {
namespace direct {
>From 8647b5c719a7d91c3dbd3954b022621c3b550aaf Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 19:41:21 +0530
Subject: [PATCH 22/32] Update clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
index 97993cabf0ebf..bf496f2ea733d 100644
--- a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
@@ -2,7 +2,11 @@
// Test X86-specific sqrt builtins
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
-// RUN: FileCheck --input-file=%t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
// Test __builtin_ia32_sqrtph512
__m512h test_sqrtph512(__m512h a) {
>From 4bac65a58020456624d39efb64f27d1301c4bb23 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 19:48:11 +0530
Subject: [PATCH 23/32] Update test
---
.../test/CIR/CodeGen/X86/cir-sqrt-builtins.c | 30 +++++++++++++------
1 file changed, 21 insertions(+), 9 deletions(-)
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
index bf496f2ea733d..a3de192f9e142 100644
--- a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
@@ -12,22 +12,34 @@
__m512h test_sqrtph512(__m512h a) {
return __builtin_ia32_sqrtph512(a);
}
-// CHECK: cir.func @test_sqrtph512
-// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.fp16 x 32>
-// CHECK: cir.return [[RES]]
+// CIR: cir.func @test_sqrtph512
+// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.fp16 x 32>
+// CIR: cir.return [[RES]]
+// LLVM: define {{.*}} @test_sqrtph512
+// LLVM: call <32 x half> @llvm.sqrt.v32f16
+// OGCG: define {{.*}} @test_sqrtph512
+// OGCG: call <32 x half> @llvm.sqrt.v32f16
// Test __builtin_ia32_sqrtps512
__m512 test_sqrtps512(__m512 a) {
return __builtin_ia32_sqrtps512(a);
}
-// CHECK: cir.func @test_sqrtps512
-// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.float x 16>
-// CHECK: cir.return [[RES]]
+// CIR: cir.func @test_sqrtps512
+// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.float x 16>
+// CIR: cir.return [[RES]]
+// LLVM: define {{.*}} @test_sqrtps512
+// LLVM: call <16 x float> @llvm.sqrt.v16f32
+// OGCG: define {{.*}} @test_sqrtps512
+// OGCG: call <16 x float> @llvm.sqrt.v16f32
// Test __builtin_ia32_sqrtpd512
__m512d test_sqrtpd512(__m512d a) {
return __builtin_ia32_sqrtpd512(a);
}
-// CHECK: cir.func @test_sqrtpd512
-// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.double x 8>
-// CHECK: cir.return [[RES]]
\ No newline at end of file
+// CIR: cir.func @test_sqrtpd512
+// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.double x 8>
+// CIR: cir.return [[RES]]
+// LLVM: define {{.*}} @test_sqrtpd512
+// LLVM: call <8 x double> @llvm.sqrt.v8f64
+// OGCG: define {{.*}} @test_sqrtpd512
+// OGCG: call <8 x double> @llvm.sqrt.v8f64
\ No newline at end of file
>From b1ff2abd50b55470361f721053fc72a9080c20d6 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 20:25:55 +0530
Subject: [PATCH 24/32] update
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index e7a6e8677569a..846fc5c07f798 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
>From 884300615cd4900e44af48016cd895005821e41f Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Fri, 5 Dec 2025 19:06:33 +0530
Subject: [PATCH 25/32] Remove unused include
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 846fc5c07f798..08573c0ae83bb 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -30,7 +30,6 @@
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Export.h"
#include "mlir/Transforms/DialectConversion.h"
-#include "clang/Basic/LLVM.h"
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
>From ed82423252e2b452efc4d3265166c08e797b259e Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Fri, 5 Dec 2025 19:19:21 +0530
Subject: [PATCH 26/32] Move sqrt lowering with other floating point operations
---
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 08573c0ae83bb..34a1ca3f10c01 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -45,14 +45,6 @@
using namespace cir;
using namespace llvm;
-// Actual lowering
- mlir::Type resTy = typeConverter->convertType(op.getType());
- rewriter.replaceOpWithNewOp<mlir::LLVM::SqrtOp>(op, resTy,
- adaptor.getSrc());
- return mlir::success();
-
-}
-
namespace cir {
namespace direct {
@@ -194,6 +186,14 @@ mlir::LogicalResult CIRToLLVMCopyOpLowering::matchAndRewrite(
return mlir::success();
}
+mlir::LogicalResult SqrtOpLowering::matchAndRewrite(
+ cir::SqrtOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ mlir::Type resTy = typeConverter->convertType(op.getType());
+ rewriter.replaceOpWithNewOp<mlir::LLVM::SqrtOp>(op, resTy, adaptor.getSrc());
+ return mlir::success();
+}
+
mlir::LogicalResult CIRToLLVMCosOpLowering::matchAndRewrite(
cir::CosOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
>From 961c9f95a70b1368b2e33adeae0aa63fd2b9ae8c Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Sat, 6 Dec 2025 09:31:10 +0530
Subject: [PATCH 27/32] Remove BOM character
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 34a1ca3f10c01..ffaebdcd9f062 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
>From 4dd8aa07aba47bbfdaf39ef36c5f4f951fb7673a Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Sat, 6 Dec 2025 11:41:11 +0530
Subject: [PATCH 28/32] Delete my-sqrt-changes.patch
---
my-sqrt-changes.patch | Bin 12058 -> 0 bytes
1 file changed, 0 insertions(+), 0 deletions(-)
delete mode 100644 my-sqrt-changes.patch
diff --git a/my-sqrt-changes.patch b/my-sqrt-changes.patch
deleted file mode 100644
index 87c0ca69ac8abe6aaa684ffbbce3c65e342f6066..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001
literal 12058
zcmdU#Z%-V_5yo4}yGZ#CbNCYBz=91N*n~S7WA4rpIa`=G7o8$*fn7{u*uCAw4m$Z$
z<U{2H<$aRp*R at 6WjAxexmdK}(%}h^sRdsd!d8%gr`=6t5656_o(9m_Pr-AO9VKE$r
zxB7dgu|<uZ#pm;I6k3&cTj6=w)z~x5YHCeOGY5LMQG5F$bVENZgg2snP+31c+P?|E
zj`AVHK>v=!tu3i|F2ZT(#J5Fw7#`?a4J*1>g9PmBADXqIw at 2E?gZTD9vmS+IJum2D
zRow**wAax3yVD$8=HgXIgUI_%f6v0X_V+gK<4JfT2^!kLd{_!=x}TH0^ZHll&67B%
zp*4%)%XlI0$LEtq3vtQMw2%h7;@i_M2YS;H_kQGPyPx#`-$zYt4foOrJex$wX4)*@
z72&0L_M&ZeqO}fWOPuPCuGis at x_hbjo$xn}Vt?}*IUk;DPxn55Q#)vChOa)+eJkz*
zYVV{s&bbKhwLjM69JXa4%<k%q?|vbZw+OHF7CLV<V-RhAJ?fhidM689kc7SHQ9JUF
zt*{-w)0%CK4 at 4W!YsVRd=9eYIM(y2cru{1Sm-52zrOlnv>V|k>FK6O_x1Z>~7uLls
z*GA)V?|;_(z3^?e_l8zf_g;5SZda39!rjli%C7cwRwvPlQ=~vw7X5m<!@}Kq>g}Ji
zNpsAjs=kw-RMOn1uP>AJUbGvFlH`glS=gp*_nWXkj3E26yyqI*ku`70diM3Sd-?W-
zyz`Zwz8!kYy=aeU{7f3YJ-sAlo~Md1BcH>Qdh$ZcM^6<M$OxQqZ<v#2E~P>qPTum|
zxb=9_Sx9;9<}~)gMU1MJAFo3P?$5Lj(<AZ9q7r#_QaZ#0G|`Sx5o<?N#2Q9jHaIa_
zo}2R-eF8a-m;@_qipyZ4v$-y1-%VN5(~~?|!$P!<!^=}}9>icnT;%Y at TU-AB7rhN6
z``zI;sG}$R3hxAsHHBp!%Oh80kB`F_dIGDql`j{Rtsd+7K-qd(Z(2fXOS)gtsw%#-
zEOU3*%q4epc2@}3`8dv7)=nR3 at AKL_ST8*<OE-r3$U{U3M|#BNvltVJyqog((-_5y
zhzHC+_v6ThthX88&qOr!gW@@P at jUwGiSSHIxT_U$bx$#DJD!Z8uYUMz_)0u~jwf2^
z4nzYg2OB|kOLY2r?r1$y^dqGM&AJF*M+{^*rw}saME@|P^kMiHOPPy4^r`1ilM-zB
zlQcJoy8R_dZ?aacDBn%J?evHHb?bQ<EvHqojJc9!^flvD7fAm?Z#elGS;Je|gH;?<
zqYdSvPK***6I{;Y3UCA!Ku0>B8QRNSw3ad#cG8y}_e3KhdE at rO&|+-`Rtr9a@{d{p
zj#!8sImaJ0+7SivWU7Y>&BLa<l8S7ZqsLbCfm4l=NgJ`o!0t!w0{VC^nH at SrMx$$J
zGT&V;Ynn&ZYko%DhEpq2V^357cDAhb>z}{+SNOM{n)=tiLi2~3MUTMhD?>5VUDP<^
zDHA)W(wIrD^+tE5iL#DB)kc at yql0lWjn|4{d8%dXCeckAvWP=%n9zK#%e02<L=`y@
zwF}vV?Y6vUs>Nos1R`I*R@;UFutwKpnFgow+o&Uap=yD=BTG`reRlb_EgPY?Q704h
zjAwoVbv)l`<|v-Fw#qlBxKWX3S-H80vaTpEBz_+h4J at +fwrmpSnx_v1_UJ^fG9O51
zh;DPsFbkxbPgd5 at +O~Wy*-w(fGZU74EDr9W*c&>Sjh at Go=f_RSL!R=;^g=6NM{X~~
zA&ooYTo0dT-h2OY&zy&I&+QXlT*lpYeOvxH{Ukx17Ou+1A4kc2cf`u|7#-bTW-GfV
zx){c9=v3)*3`W?5WqY#?v)oTT+q-}LNfv^}t=H64^yQqX5YYH9V=t3c7u44vUa#%8
z<Z0yCZ}ji%a@?N0`nKx%d!g4=MbATNr7N9Oar3k&qj91MQV}nyDGp*au_SM$3%nX5
zCRIf%RuxVC-_rPz<}JlaWAdtEno^IdD5lB7tBEz~Wm!o7p)RV4^t`NRo8{e<b$R&f
zn6u8MCy$GE#$PLf(naxZ1aqkpKrwdWWh)?0cI7Lxaa~{Yj<w#p_XirITVwX}T-}cO
zq5lva$(n{5j4qs>s=nFRRbII-DoyQfNpBpN_Lb-N#h2)cZR|@%=>1((%txoe1G;z3
zqd%5M!m?mKWa+7M_ZR7DRyf)BZ2C@;8YZ=lds&x#txEaB@)XFM>W<owUg*!^!)3*G
zdo6Z#I!p4`)hV6~G^cmYeBGM9gobOZhUTk`T}^2j<P37?$OjB197p}{#F1Im;tk;}
z8*~5iclT>fFZXMkmsd2m<>Qi*_YGTMS7j-`)2`?QfWZ2CcR%h6{6jSQoi1v@$vm6W
z039~v%T|H$rHVLJMR>`QlZ?48@)&1pP1_#NUd0$>HIVDMyf53)tU6Ok at AbSKE4q|C
z8#m*;Fg#D6hmH(8rwG0zADJJ1`CF4kJ{mrKiqSi1A-^-s&K`Zg+YvcACeL?Im$k1m
z-H_#w5vgd`YtZ*sLwmxCQe at A$ru*yJ7<hsX0ND<k+Yn~3Iw#c`^ug%#V!NlZW=^Hf
z?hNbJ`MGB-vj&gGpc(oRbTYu1bQ?DG<a&6O^_$Ruc`^05*%O*fPi{*oTckKs=3mdD
zi2XkFpUX3QnTJhU_)R89I+1c_aar6tcC~BzMqq^G5w_ZUg*@F3D|Cew^7wu;Wc)0m
z5b6Nz#lJ`yZZ5x at 921w<O+vGZhL;sHs8X<@mUj72_YakALBp#$zqamIgq;p`w<cV?
zqOqH!;hV`jDGt6Fb&sEXPgs#p;GrO3f2O-`bLd!GW<1nb9(Bl?{CdVZIk{D0d-U55
zm5ms8ltd5VNYSc{mS;83Jvg6D<w#$nEBR6k>t*!H^57GPy6_Imr*$3l#x?ffQsr?4
ze<xzoOKmHjSl-SNONt^?V8ompuRPb7b#qwhKAp#SliH$UqGcVCDz#keSJ6X<Mit4u
z$72*fi+z{G<9^h5ip*uLI~L%CwxtO}*kJ3r4i;-YUgaIEyi=R{kSR|zG8wEtwmCds
zdf1@#)I;=oY*F85_?^kF-lu$oRL2?_=q$)xd<`9~jqByu at tVs0Wt4qhwNHzd<=!5C
z!5OAK*6%6VPeK`Hjc%I@^2iJ6XFqBaZ_hKTb@;4n?KW!I0<@Z2JsHRCD)Dmro))61
zY8$R)+nh|}taJ2{ETK$kl$EFXP~n=1GVoP~h=F~9IP%jm3#@*}$?ey^3z+6i-yipH
z>!^(SdXW23d8f!<j-2BoxktfW?aU)No<A-UnqJIn^L1#3T~jgJ*BSaCXw1G4*%u#+
z4Aukb3g=MA97XH0-)Q(;A^`b;_24Y4{_u0MkL?plOpf>B)YR;5j8epdb|n*WYGf#4
z3t7*40d4)|`<By1UW*?t)A(PKRy$W{`@Lw8L5NHCJ5s2Ek at ob~f8n>NJD;16PmYz+
z`rGnCo6txd#WhuU52V9YjU5UxeG!oq80xXG)0)QUF+LEoSkiqn)(MmTPT}r!#y%#+
zR at 2p<V=jKd;1#RgJK8dfy%+U7>Ni>Rn$Ly^gw7#P*oplnl(&3Owo#AR`5OB at Q~DiK
z5ytbY;>%~^w5JS<)!N<7X*m*f4DZiGm+~)`t|y~w!+!r}|NB^#_I3B~bjnoATDrML
i>Z$uV<~q4bZj<k6%t_u&-c)?ycV>LO at Ne}=*8c%%jt^@9
>From cc5ffa1eee82b86f897ac3cabed6f5b39f28ed61 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Sun, 7 Dec 2025 17:01:19 +0530
Subject: [PATCH 29/32] Update errorNYI call
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 3140 ++++++++++----------
my-sqrt-changes.patch | Bin 12058 -> 0 bytes
2 files changed, 1571 insertions(+), 1569 deletions(-)
delete mode 100644 my-sqrt-changes.patch
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index d99dfad0f6c45..080a696b868cf 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1,1569 +1,1571 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This contains code to emit x86/x86_64 Builtin calls as CIR or a function
-// call to be later resolved.
-//
-//===----------------------------------------------------------------------===//
-
-#include "CIRGenBuilder.h"
-#include "CIRGenFunction.h"
-#include "CIRGenModule.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/ValueRange.h"
-#include "clang/Basic/Builtins.h"
-#include "clang/Basic/TargetBuiltins.h"
-#include "clang/CIR/Dialect/IR/CIRTypes.h"
-#include "clang/CIR/MissingFeatures.h"
-
-using namespace clang;
-using namespace clang::CIRGen;
-
-template <typename... Operands>
-static mlir::Value emitIntrinsicCallOp(CIRGenBuilderTy &builder,
- mlir::Location loc, const StringRef str,
- const mlir::Type &resTy,
- Operands &&...op) {
- return cir::LLVMIntrinsicCallOp::create(builder, loc,
- builder.getStringAttr(str), resTy,
- std::forward<Operands>(op)...)
- .getResult();
-}
-
-// OG has unordered comparison as a form of optimization in addition to
-// ordered comparison, while CIR doesn't.
-//
-// This means that we can't encode the comparison code of UGT (unordered
-// greater than), at least not at the CIR level.
-//
-// The boolean shouldInvert compensates for this.
-// For example: to get to the comparison code UGT, we pass in
-// emitVectorFCmp (OLE, shouldInvert = true) since OLE is the inverse of UGT.
-
-// There are several ways to support this otherwise:
-// - register extra CmpOpKind for unordered comparison types and build the
-// translation code for
-// to go from CIR -> LLVM dialect. Notice we get this naturally with
-// shouldInvert, benefiting from existing infrastructure, albeit having to
-// generate an extra `not` at CIR).
-// - Just add extra comparison code to a new VecCmpOpKind instead of
-// cluttering CmpOpKind.
-// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered
-// comparison
-// - Just emit the intrinsics call instead of calling this helper, see how the
-// LLVM lowering handles this.
-static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder,
- llvm::SmallVector<mlir::Value> &ops,
- mlir::Location loc, cir::CmpOpKind pred,
- bool shouldInvert) {
- assert(!cir::MissingFeatures::cgFPOptionsRAII());
- // TODO(cir): Add isSignaling boolean once emitConstrainedFPCall implemented
- assert(!cir::MissingFeatures::emitConstrainedFPCall());
- mlir::Value cmp = builder.createVecCompare(loc, pred, ops[0], ops[1]);
- mlir::Value bitCast = builder.createBitcast(
- shouldInvert ? builder.createNot(cmp) : cmp, ops[0].getType());
- return bitCast;
-}
-
-static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc,
- mlir::Value mask, unsigned numElems) {
- auto maskTy = cir::VectorType::get(
- builder.getUIntNTy(1), cast<cir::IntType>(mask.getType()).getWidth());
- mlir::Value maskVec = builder.createBitcast(mask, maskTy);
-
- // If we have less than 8 elements, then the starting mask was an i8 and
- // we need to extract down to the right number of elements.
- if (numElems < 8) {
- SmallVector<mlir::Attribute, 4> indices;
- mlir::Type i32Ty = builder.getSInt32Ty();
- for (auto i : llvm::seq<unsigned>(0, numElems))
- indices.push_back(cir::IntAttr::get(i32Ty, i));
-
- maskVec = builder.createVecShuffle(loc, maskVec, maskVec, indices);
- }
- return maskVec;
-}
-
-// Builds the VecShuffleOp for pshuflw and pshufhw x86 builtins.
-//
-// The vector is split into lanes of 8 word elements (16 bits). The lower or
-// upper half of each lane, controlled by `isLow`, is shuffled in the following
-// way: The immediate is truncated to 8 bits, separated into 4 2-bit fields. The
-// i-th field's value represents the resulting index of the i-th element in the
-// half lane after shuffling. The other half of the lane remains unchanged.
-static cir::VecShuffleOp emitPshufWord(CIRGenBuilderTy &builder,
- const mlir::Value vec,
- const mlir::Value immediate,
- const mlir::Location loc,
- const bool isLow) {
- uint32_t imm = CIRGenFunction::getZExtIntValueFromConstOp(immediate);
-
- auto vecTy = cast<cir::VectorType>(vec.getType());
- unsigned numElts = vecTy.getSize();
-
- unsigned firstHalfStart = isLow ? 0 : 4;
- unsigned secondHalfStart = 4 - firstHalfStart;
-
- // Splat the 8-bits of immediate 4 times to help the loop wrap around.
- imm = (imm & 0xff) * 0x01010101;
-
- int64_t indices[32];
- for (unsigned l = 0; l != numElts; l += 8) {
- for (unsigned i = firstHalfStart; i != firstHalfStart + 4; ++i) {
- indices[l + i] = l + (imm & 3) + firstHalfStart;
- imm >>= 2;
- }
- for (unsigned i = secondHalfStart; i != secondHalfStart + 4; ++i)
- indices[l + i] = l + i;
- }
-
- return builder.createVecShuffle(loc, vec, ArrayRef(indices, numElts));
-}
-
-// Builds the shuffle mask for pshufd and shufpd/shufps x86 builtins.
-// The shuffle mask is written to outIndices.
-static void
-computeFullLaneShuffleMask(CIRGenFunction &cgf, const mlir::Value vec,
- uint32_t imm, const bool isShufP,
- llvm::SmallVectorImpl<int64_t> &outIndices) {
- auto vecTy = cast<cir::VectorType>(vec.getType());
- unsigned numElts = vecTy.getSize();
- unsigned numLanes = cgf.cgm.getDataLayout().getTypeSizeInBits(vecTy) / 128;
- unsigned numLaneElts = numElts / numLanes;
-
- // Splat the 8-bits of immediate 4 times to help the loop wrap around.
- imm = (imm & 0xff) * 0x01010101;
-
- for (unsigned l = 0; l != numElts; l += numLaneElts) {
- for (unsigned i = 0; i != numLaneElts; ++i) {
- uint32_t idx = imm % numLaneElts;
- imm /= numLaneElts;
- if (isShufP && i >= (numLaneElts / 2))
- idx += numElts;
- outIndices[l + i] = l + idx;
- }
- }
-
- outIndices.resize(numElts);
-}
-
-static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
- mlir::Location loc,
- const std::string &intrinsicName,
- SmallVectorImpl<mlir::Value> &ops) {
-
- auto intTy = cast<cir::IntType>(ops[0].getType());
- unsigned numElts = intTy.getWidth();
- mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts);
- mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts);
- mlir::Type vecTy = lhsVec.getType();
- mlir::Value resVec = emitIntrinsicCallOp(builder, loc, intrinsicName, vecTy,
- mlir::ValueRange{lhsVec, rhsVec});
- return builder.createBitcast(resVec, ops[0].getType());
-}
-
-static mlir::Value emitX86MaskUnpack(CIRGenBuilderTy &builder,
- mlir::Location loc,
- const std::string &intrinsicName,
- SmallVectorImpl<mlir::Value> &ops) {
- unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
-
- // Convert both operands to mask vectors.
- mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElems);
- mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElems);
-
- mlir::Type i32Ty = builder.getSInt32Ty();
-
- // Create indices for extracting the first half of each vector.
- SmallVector<mlir::Attribute, 32> halfIndices;
- for (auto i : llvm::seq<unsigned>(0, numElems / 2))
- halfIndices.push_back(cir::IntAttr::get(i32Ty, i));
-
- // Extract first half of each vector. This gives better codegen than
- // doing it in a single shuffle.
- mlir::Value lhsHalf = builder.createVecShuffle(loc, lhs, lhs, halfIndices);
- mlir::Value rhsHalf = builder.createVecShuffle(loc, rhs, rhs, halfIndices);
-
- // Create indices for concatenating the vectors.
- // NOTE: Operands are swapped to match the intrinsic definition.
- // After the half extraction, both vectors have numElems/2 elements.
- // In createVecShuffle(rhsHalf, lhsHalf, indices), indices [0..numElems/2-1]
- // select from rhsHalf, and indices [numElems/2..numElems-1] select from
- // lhsHalf.
- SmallVector<mlir::Attribute, 64> concatIndices;
- for (auto i : llvm::seq<unsigned>(0, numElems))
- concatIndices.push_back(cir::IntAttr::get(i32Ty, i));
-
- // Concat the vectors (RHS first, then LHS).
- mlir::Value res =
- builder.createVecShuffle(loc, rhsHalf, lhsHalf, concatIndices);
- return builder.createBitcast(res, ops[0].getType());
-}
-
-static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder,
- mlir::Location loc,
- cir::BinOpKind binOpKind,
- SmallVectorImpl<mlir::Value> &ops,
- bool invertLHS = false) {
- unsigned numElts = cast<cir::IntType>(ops[0].getType()).getWidth();
- mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElts);
- mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElts);
-
- if (invertLHS)
- lhs = builder.createNot(lhs);
- return builder.createBitcast(builder.createBinop(loc, lhs, binOpKind, rhs),
- ops[0].getType());
-}
-
-static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc,
- const std::string &intrinsicName,
- SmallVectorImpl<mlir::Value> &ops) {
- auto intTy = cast<cir::IntType>(ops[0].getType());
- unsigned numElts = intTy.getWidth();
- mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts);
- mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts);
- mlir::Type resTy = builder.getSInt32Ty();
- return emitIntrinsicCallOp(builder, loc, intrinsicName, resTy,
- mlir::ValueRange{lhsVec, rhsVec});
-}
-
-static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc,
- mlir::Value vec, mlir::Value value,
- mlir::Value indexOp) {
- unsigned numElts = cast<cir::VectorType>(vec.getType()).getSize();
-
- uint64_t index =
- indexOp.getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
-
- index &= numElts - 1;
-
- cir::ConstantOp indexVal = builder.getUInt64(index, loc);
-
- return cir::VecInsertOp::create(builder, loc, vec, value, indexVal);
-}
-
-static mlir::Value emitX86FunnelShift(CIRGenBuilderTy &builder,
- mlir::Location location, mlir::Value &op0,
- mlir::Value &op1, mlir::Value &amt,
- bool isRight) {
- mlir::Type op0Ty = op0.getType();
-
- // Amount may be scalar immediate, in which case create a splat vector.
- // Funnel shifts amounts are treated as modulo and types are all power-of-2
- // so we only care about the lowest log2 bits anyway.
- if (amt.getType() != op0Ty) {
- auto vecTy = mlir::cast<cir::VectorType>(op0Ty);
- uint64_t numElems = vecTy.getSize();
-
- auto amtTy = mlir::cast<cir::IntType>(amt.getType());
- auto vecElemTy = mlir::cast<cir::IntType>(vecTy.getElementType());
-
- // If signed, cast to the same width but unsigned first to
- // ensure zero-extension when casting to a bigger unsigned `vecElemeTy`.
- if (amtTy.isSigned()) {
- cir::IntType unsignedAmtTy = builder.getUIntNTy(amtTy.getWidth());
- amt = builder.createIntCast(amt, unsignedAmtTy);
- }
- cir::IntType unsignedVecElemType = builder.getUIntNTy(vecElemTy.getWidth());
- amt = builder.createIntCast(amt, unsignedVecElemType);
- amt = cir::VecSplatOp::create(
- builder, location, cir::VectorType::get(unsignedVecElemType, numElems),
- amt);
- }
-
- const StringRef intrinsicName = isRight ? "fshr" : "fshl";
- return emitIntrinsicCallOp(builder, location, intrinsicName, op0Ty,
- mlir::ValueRange{op0, op1, amt});
-}
-
-static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
- bool isSigned,
- SmallVectorImpl<mlir::Value> &ops,
- unsigned opTypePrimitiveSizeInBits) {
- mlir::Type ty = cir::VectorType::get(builder.getSInt64Ty(),
- opTypePrimitiveSizeInBits / 64);
- mlir::Value lhs = builder.createBitcast(loc, ops[0], ty);
- mlir::Value rhs = builder.createBitcast(loc, ops[1], ty);
- if (isSigned) {
- cir::ConstantOp shiftAmt =
- builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32));
- cir::VecSplatOp shiftSplatVecOp =
- cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult());
- mlir::Value shiftSplatValue = shiftSplatVecOp.getResult();
- // In CIR, right-shift operations are automatically lowered to either an
- // arithmetic or logical shift depending on the operand type. The purpose
- // of the shifts here is to propagate the sign bit of the 32-bit input
- // into the upper bits of each vector lane.
- lhs = builder.createShift(loc, lhs, shiftSplatValue, true);
- lhs = builder.createShift(loc, lhs, shiftSplatValue, false);
- rhs = builder.createShift(loc, rhs, shiftSplatValue, true);
- rhs = builder.createShift(loc, rhs, shiftSplatValue, false);
- } else {
- cir::ConstantOp maskScalar = builder.getConstant(
- loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff));
- cir::VecSplatOp mask =
- cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult());
- // Clear the upper bits
- lhs = builder.createAnd(loc, lhs, mask);
- rhs = builder.createAnd(loc, rhs, mask);
- }
- return builder.createMul(loc, lhs, rhs);
-}
-
-static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
- llvm::SmallVector<mlir::Value> ops,
- bool isSigned) {
- mlir::Value op0 = ops[0];
- mlir::Value op1 = ops[1];
-
- cir::VectorType ty = cast<cir::VectorType>(op0.getType());
- cir::IntType elementTy = cast<cir::IntType>(ty.getElementType());
-
- uint64_t imm = CIRGenFunction::getZExtIntValueFromConstOp(ops[2]) & 0x7;
-
- cir::CmpOpKind pred;
- switch (imm) {
- case 0x0:
- pred = cir::CmpOpKind::lt;
- break;
- case 0x1:
- pred = cir::CmpOpKind::le;
- break;
- case 0x2:
- pred = cir::CmpOpKind::gt;
- break;
- case 0x3:
- pred = cir::CmpOpKind::ge;
- break;
- case 0x4:
- pred = cir::CmpOpKind::eq;
- break;
- case 0x5:
- pred = cir::CmpOpKind::ne;
- break;
- case 0x6:
- return builder.getNullValue(ty, loc); // FALSE
- case 0x7: {
- llvm::APInt allOnes = llvm::APInt::getAllOnes(elementTy.getWidth());
- return cir::VecSplatOp::create(
- builder, loc, ty,
- builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE
- }
- default:
- llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
- }
-
- if ((!isSigned && elementTy.isSigned()) ||
- (isSigned && elementTy.isUnsigned())) {
- elementTy = elementTy.isSigned() ? builder.getUIntNTy(elementTy.getWidth())
- : builder.getSIntNTy(elementTy.getWidth());
- ty = cir::VectorType::get(elementTy, ty.getSize());
- op0 = builder.createBitcast(op0, ty);
- op1 = builder.createBitcast(op1, ty);
- }
-
- return builder.createVecCompare(loc, pred, op0, op1);
-}
-
-mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
- const CallExpr *expr) {
- if (builtinID == Builtin::BI__builtin_cpu_is) {
- cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is");
- return {};
- }
- if (builtinID == Builtin::BI__builtin_cpu_supports) {
- cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_supports");
- return {};
- }
- if (builtinID == Builtin::BI__builtin_cpu_init) {
- cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_init");
- return {};
- }
-
- // Handle MSVC intrinsics before argument evaluation to prevent double
- // evaluation.
- assert(!cir::MissingFeatures::msvcBuiltins());
-
- // Find out if any arguments are required to be integer constant expressions.
- assert(!cir::MissingFeatures::handleBuiltinICEArguments());
-
- // The operands of the builtin call
- llvm::SmallVector<mlir::Value> ops;
-
- // `ICEArguments` is a bitmap indicating whether the argument at the i-th bit
- // is required to be a constant integer expression.
- unsigned iceArguments = 0;
- ASTContext::GetBuiltinTypeError error;
- getContext().GetBuiltinType(builtinID, error, &iceArguments);
- assert(error == ASTContext::GE_None && "Error while getting builtin type.");
-
- for (auto [idx, arg] : llvm::enumerate(expr->arguments()))
- ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg));
-
- CIRGenBuilderTy &builder = getBuilder();
- mlir::Type voidTy = builder.getVoidTy();
-
- switch (builtinID) {
- default:
- return {};
- case X86::BI_mm_clflush:
- return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
- "x86.sse2.clflush", voidTy, ops[0]);
- case X86::BI_mm_lfence:
- return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
- "x86.sse2.lfence", voidTy);
- case X86::BI_mm_pause:
- return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
- "x86.sse2.pause", voidTy);
- case X86::BI_mm_mfence:
- return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
- "x86.sse2.mfence", voidTy);
- case X86::BI_mm_sfence:
- return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
- "x86.sse.sfence", voidTy);
- case X86::BI_mm_prefetch:
- case X86::BI__rdtsc:
- case X86::BI__builtin_ia32_rdtscp: {
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
- }
- case X86::BI__builtin_ia32_lzcnt_u16:
- case X86::BI__builtin_ia32_lzcnt_u32:
- case X86::BI__builtin_ia32_lzcnt_u64: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- mlir::Value isZeroPoison = builder.getFalse(loc);
- return emitIntrinsicCallOp(builder, loc, "ctlz", ops[0].getType(),
- mlir::ValueRange{ops[0], isZeroPoison});
- }
- case X86::BI__builtin_ia32_tzcnt_u16:
- case X86::BI__builtin_ia32_tzcnt_u32:
- case X86::BI__builtin_ia32_tzcnt_u64: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- mlir::Value isZeroPoison = builder.getFalse(loc);
- return emitIntrinsicCallOp(builder, loc, "cttz", ops[0].getType(),
- mlir::ValueRange{ops[0], isZeroPoison});
- }
- case X86::BI__builtin_ia32_undef128:
- case X86::BI__builtin_ia32_undef256:
- case X86::BI__builtin_ia32_undef512:
- // The x86 definition of "undef" is not the same as the LLVM definition
- // (PR32176). We leave optimizing away an unnecessary zero constant to the
- // IR optimizer and backend.
- // TODO: If we had a "freeze" IR instruction to generate a fixed undef
- // value, we should use that here instead of a zero.
- return builder.getNullValue(convertType(expr->getType()),
- getLoc(expr->getExprLoc()));
- case X86::BI__builtin_ia32_vec_ext_v4hi:
- case X86::BI__builtin_ia32_vec_ext_v16qi:
- case X86::BI__builtin_ia32_vec_ext_v8hi:
- case X86::BI__builtin_ia32_vec_ext_v4si:
- case X86::BI__builtin_ia32_vec_ext_v4sf:
- case X86::BI__builtin_ia32_vec_ext_v2di:
- case X86::BI__builtin_ia32_vec_ext_v32qi:
- case X86::BI__builtin_ia32_vec_ext_v16hi:
- case X86::BI__builtin_ia32_vec_ext_v8si:
- case X86::BI__builtin_ia32_vec_ext_v4di: {
- unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
-
- uint64_t index = getZExtIntValueFromConstOp(ops[1]);
- index &= numElts - 1;
-
- cir::ConstantOp indexVal =
- builder.getUInt64(index, getLoc(expr->getExprLoc()));
-
- // These builtins exist so we can ensure the index is an ICE and in range.
- // Otherwise we could just do this in the header file.
- return cir::VecExtractOp::create(builder, getLoc(expr->getExprLoc()),
- ops[0], indexVal);
- }
- case X86::BI__builtin_ia32_vec_set_v4hi:
- case X86::BI__builtin_ia32_vec_set_v16qi:
- case X86::BI__builtin_ia32_vec_set_v8hi:
- case X86::BI__builtin_ia32_vec_set_v4si:
- case X86::BI__builtin_ia32_vec_set_v2di:
- case X86::BI__builtin_ia32_vec_set_v32qi:
- case X86::BI__builtin_ia32_vec_set_v16hi:
- case X86::BI__builtin_ia32_vec_set_v8si:
- case X86::BI__builtin_ia32_vec_set_v4di: {
- return emitVecInsert(builder, getLoc(expr->getExprLoc()), ops[0], ops[1],
- ops[2]);
- }
- case X86::BI__builtin_ia32_kunpckhi:
- return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.kunpackb", ops);
- case X86::BI__builtin_ia32_kunpcksi:
- return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.kunpackw", ops);
- case X86::BI__builtin_ia32_kunpckdi:
- return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.kunpackd", ops);
- case X86::BI_mm_setcsr:
- case X86::BI__builtin_ia32_ldmxcsr: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- Address tmp = createMemTemp(expr->getArg(0)->getType(), loc);
- builder.createStore(loc, ops[0], tmp);
- return emitIntrinsicCallOp(builder, loc, "x86.sse.ldmxcsr",
- builder.getVoidTy(), tmp.getPointer());
- }
- case X86::BI_mm_getcsr:
- case X86::BI__builtin_ia32_stmxcsr: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- Address tmp = createMemTemp(expr->getType(), loc);
- emitIntrinsicCallOp(builder, loc, "x86.sse.stmxcsr", builder.getVoidTy(),
- tmp.getPointer());
- return builder.createLoad(loc, tmp);
- }
- case X86::BI__builtin_ia32_xsave:
- case X86::BI__builtin_ia32_xsave64:
- case X86::BI__builtin_ia32_xrstor:
- case X86::BI__builtin_ia32_xrstor64:
- case X86::BI__builtin_ia32_xsaveopt:
- case X86::BI__builtin_ia32_xsaveopt64:
- case X86::BI__builtin_ia32_xrstors:
- case X86::BI__builtin_ia32_xrstors64:
- case X86::BI__builtin_ia32_xsavec:
- case X86::BI__builtin_ia32_xsavec64:
- case X86::BI__builtin_ia32_xsaves:
- case X86::BI__builtin_ia32_xsaves64:
- case X86::BI__builtin_ia32_xsetbv:
- case X86::BI_xsetbv:
- case X86::BI__builtin_ia32_xgetbv:
- case X86::BI_xgetbv:
- case X86::BI__builtin_ia32_storedqudi128_mask:
- case X86::BI__builtin_ia32_storedqusi128_mask:
- case X86::BI__builtin_ia32_storedquhi128_mask:
- case X86::BI__builtin_ia32_storedquqi128_mask:
- case X86::BI__builtin_ia32_storeupd128_mask:
- case X86::BI__builtin_ia32_storeups128_mask:
- case X86::BI__builtin_ia32_storedqudi256_mask:
- case X86::BI__builtin_ia32_storedqusi256_mask:
- case X86::BI__builtin_ia32_storedquhi256_mask:
- case X86::BI__builtin_ia32_storedquqi256_mask:
- case X86::BI__builtin_ia32_storeupd256_mask:
- case X86::BI__builtin_ia32_storeups256_mask:
- case X86::BI__builtin_ia32_storedqudi512_mask:
- case X86::BI__builtin_ia32_storedqusi512_mask:
- case X86::BI__builtin_ia32_storedquhi512_mask:
- case X86::BI__builtin_ia32_storedquqi512_mask:
- case X86::BI__builtin_ia32_storeupd512_mask:
- case X86::BI__builtin_ia32_storeups512_mask:
- case X86::BI__builtin_ia32_storesbf16128_mask:
- case X86::BI__builtin_ia32_storesh128_mask:
- case X86::BI__builtin_ia32_storess128_mask:
- case X86::BI__builtin_ia32_storesd128_mask:
- case X86::BI__builtin_ia32_cvtmask2b128:
- case X86::BI__builtin_ia32_cvtmask2b256:
- case X86::BI__builtin_ia32_cvtmask2b512:
- case X86::BI__builtin_ia32_cvtmask2w128:
- case X86::BI__builtin_ia32_cvtmask2w256:
- case X86::BI__builtin_ia32_cvtmask2w512:
- case X86::BI__builtin_ia32_cvtmask2d128:
- case X86::BI__builtin_ia32_cvtmask2d256:
- case X86::BI__builtin_ia32_cvtmask2d512:
- case X86::BI__builtin_ia32_cvtmask2q128:
- case X86::BI__builtin_ia32_cvtmask2q256:
- case X86::BI__builtin_ia32_cvtmask2q512:
- case X86::BI__builtin_ia32_cvtb2mask128:
- case X86::BI__builtin_ia32_cvtb2mask256:
- case X86::BI__builtin_ia32_cvtb2mask512:
- case X86::BI__builtin_ia32_cvtw2mask128:
- case X86::BI__builtin_ia32_cvtw2mask256:
- case X86::BI__builtin_ia32_cvtw2mask512:
- case X86::BI__builtin_ia32_cvtd2mask128:
- case X86::BI__builtin_ia32_cvtd2mask256:
- case X86::BI__builtin_ia32_cvtd2mask512:
- case X86::BI__builtin_ia32_cvtq2mask128:
- case X86::BI__builtin_ia32_cvtq2mask256:
- case X86::BI__builtin_ia32_cvtq2mask512:
- case X86::BI__builtin_ia32_cvtdq2ps512_mask:
- case X86::BI__builtin_ia32_cvtqq2ps512_mask:
- case X86::BI__builtin_ia32_cvtqq2pd512_mask:
- case X86::BI__builtin_ia32_vcvtw2ph512_mask:
- case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
- case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
- case X86::BI__builtin_ia32_cvtudq2ps512_mask:
- case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
- case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
- case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
- case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
- case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
- case X86::BI__builtin_ia32_vfmaddsh3_mask:
- case X86::BI__builtin_ia32_vfmaddss3_mask:
- case X86::BI__builtin_ia32_vfmaddsd3_mask:
- case X86::BI__builtin_ia32_vfmaddsh3_maskz:
- case X86::BI__builtin_ia32_vfmaddss3_maskz:
- case X86::BI__builtin_ia32_vfmaddsd3_maskz:
- case X86::BI__builtin_ia32_vfmaddsh3_mask3:
- case X86::BI__builtin_ia32_vfmaddss3_mask3:
- case X86::BI__builtin_ia32_vfmaddsd3_mask3:
- case X86::BI__builtin_ia32_vfmsubsh3_mask3:
- case X86::BI__builtin_ia32_vfmsubss3_mask3:
- case X86::BI__builtin_ia32_vfmsubsd3_mask3:
- case X86::BI__builtin_ia32_vfmaddph512_mask:
- case X86::BI__builtin_ia32_vfmaddph512_maskz:
- case X86::BI__builtin_ia32_vfmaddph512_mask3:
- case X86::BI__builtin_ia32_vfmaddps512_mask:
- case X86::BI__builtin_ia32_vfmaddps512_maskz:
- case X86::BI__builtin_ia32_vfmaddps512_mask3:
- case X86::BI__builtin_ia32_vfmsubps512_mask3:
- case X86::BI__builtin_ia32_vfmaddpd512_mask:
- case X86::BI__builtin_ia32_vfmaddpd512_maskz:
- case X86::BI__builtin_ia32_vfmaddpd512_mask3:
- case X86::BI__builtin_ia32_vfmsubpd512_mask3:
- case X86::BI__builtin_ia32_vfmsubph512_mask3:
- case X86::BI__builtin_ia32_vfmaddsubph512_mask:
- case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
- case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
- case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
- case X86::BI__builtin_ia32_vfmaddsubps512_mask:
- case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
- case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
- case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
- case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
- case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
- case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
- case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
- case X86::BI__builtin_ia32_movdqa32store128_mask:
- case X86::BI__builtin_ia32_movdqa64store128_mask:
- case X86::BI__builtin_ia32_storeaps128_mask:
- case X86::BI__builtin_ia32_storeapd128_mask:
- case X86::BI__builtin_ia32_movdqa32store256_mask:
- case X86::BI__builtin_ia32_movdqa64store256_mask:
- case X86::BI__builtin_ia32_storeaps256_mask:
- case X86::BI__builtin_ia32_storeapd256_mask:
- case X86::BI__builtin_ia32_movdqa32store512_mask:
- case X86::BI__builtin_ia32_movdqa64store512_mask:
- case X86::BI__builtin_ia32_storeaps512_mask:
- case X86::BI__builtin_ia32_storeapd512_mask:
- case X86::BI__builtin_ia32_loadups128_mask:
- case X86::BI__builtin_ia32_loadups256_mask:
- case X86::BI__builtin_ia32_loadups512_mask:
- case X86::BI__builtin_ia32_loadupd128_mask:
- case X86::BI__builtin_ia32_loadupd256_mask:
- case X86::BI__builtin_ia32_loadupd512_mask:
- case X86::BI__builtin_ia32_loaddquqi128_mask:
- case X86::BI__builtin_ia32_loaddquqi256_mask:
- case X86::BI__builtin_ia32_loaddquqi512_mask:
- case X86::BI__builtin_ia32_loaddquhi128_mask:
- case X86::BI__builtin_ia32_loaddquhi256_mask:
- case X86::BI__builtin_ia32_loaddquhi512_mask:
- case X86::BI__builtin_ia32_loaddqusi128_mask:
- case X86::BI__builtin_ia32_loaddqusi256_mask:
- case X86::BI__builtin_ia32_loaddqusi512_mask:
- case X86::BI__builtin_ia32_loaddqudi128_mask:
- case X86::BI__builtin_ia32_loaddqudi256_mask:
- case X86::BI__builtin_ia32_loaddqudi512_mask:
- case X86::BI__builtin_ia32_loadsbf16128_mask:
- case X86::BI__builtin_ia32_loadsh128_mask:
- case X86::BI__builtin_ia32_loadss128_mask:
- case X86::BI__builtin_ia32_loadsd128_mask:
- case X86::BI__builtin_ia32_loadaps128_mask:
- case X86::BI__builtin_ia32_loadaps256_mask:
- case X86::BI__builtin_ia32_loadaps512_mask:
- case X86::BI__builtin_ia32_loadapd128_mask:
- case X86::BI__builtin_ia32_loadapd256_mask:
- case X86::BI__builtin_ia32_loadapd512_mask:
- case X86::BI__builtin_ia32_movdqa32load128_mask:
- case X86::BI__builtin_ia32_movdqa32load256_mask:
- case X86::BI__builtin_ia32_movdqa32load512_mask:
- case X86::BI__builtin_ia32_movdqa64load128_mask:
- case X86::BI__builtin_ia32_movdqa64load256_mask:
- case X86::BI__builtin_ia32_movdqa64load512_mask:
- case X86::BI__builtin_ia32_expandloaddf128_mask:
- case X86::BI__builtin_ia32_expandloaddf256_mask:
- case X86::BI__builtin_ia32_expandloaddf512_mask:
- case X86::BI__builtin_ia32_expandloadsf128_mask:
- case X86::BI__builtin_ia32_expandloadsf256_mask:
- case X86::BI__builtin_ia32_expandloadsf512_mask:
- case X86::BI__builtin_ia32_expandloaddi128_mask:
- case X86::BI__builtin_ia32_expandloaddi256_mask:
- case X86::BI__builtin_ia32_expandloaddi512_mask:
- case X86::BI__builtin_ia32_expandloadsi128_mask:
- case X86::BI__builtin_ia32_expandloadsi256_mask:
- case X86::BI__builtin_ia32_expandloadsi512_mask:
- case X86::BI__builtin_ia32_expandloadhi128_mask:
- case X86::BI__builtin_ia32_expandloadhi256_mask:
- case X86::BI__builtin_ia32_expandloadhi512_mask:
- case X86::BI__builtin_ia32_expandloadqi128_mask:
- case X86::BI__builtin_ia32_expandloadqi256_mask:
- case X86::BI__builtin_ia32_expandloadqi512_mask:
- case X86::BI__builtin_ia32_compressstoredf128_mask:
- case X86::BI__builtin_ia32_compressstoredf256_mask:
- case X86::BI__builtin_ia32_compressstoredf512_mask:
- case X86::BI__builtin_ia32_compressstoresf128_mask:
- case X86::BI__builtin_ia32_compressstoresf256_mask:
- case X86::BI__builtin_ia32_compressstoresf512_mask:
- case X86::BI__builtin_ia32_compressstoredi128_mask:
- case X86::BI__builtin_ia32_compressstoredi256_mask:
- case X86::BI__builtin_ia32_compressstoredi512_mask:
- case X86::BI__builtin_ia32_compressstoresi128_mask:
- case X86::BI__builtin_ia32_compressstoresi256_mask:
- case X86::BI__builtin_ia32_compressstoresi512_mask:
- case X86::BI__builtin_ia32_compressstorehi128_mask:
- case X86::BI__builtin_ia32_compressstorehi256_mask:
- case X86::BI__builtin_ia32_compressstorehi512_mask:
- case X86::BI__builtin_ia32_compressstoreqi128_mask:
- case X86::BI__builtin_ia32_compressstoreqi256_mask:
- case X86::BI__builtin_ia32_compressstoreqi512_mask:
- case X86::BI__builtin_ia32_expanddf128_mask:
- case X86::BI__builtin_ia32_expanddf256_mask:
- case X86::BI__builtin_ia32_expanddf512_mask:
- case X86::BI__builtin_ia32_expandsf128_mask:
- case X86::BI__builtin_ia32_expandsf256_mask:
- case X86::BI__builtin_ia32_expandsf512_mask:
- case X86::BI__builtin_ia32_expanddi128_mask:
- case X86::BI__builtin_ia32_expanddi256_mask:
- case X86::BI__builtin_ia32_expanddi512_mask:
- case X86::BI__builtin_ia32_expandsi128_mask:
- case X86::BI__builtin_ia32_expandsi256_mask:
- case X86::BI__builtin_ia32_expandsi512_mask:
- case X86::BI__builtin_ia32_expandhi128_mask:
- case X86::BI__builtin_ia32_expandhi256_mask:
- case X86::BI__builtin_ia32_expandhi512_mask:
- case X86::BI__builtin_ia32_expandqi128_mask:
- case X86::BI__builtin_ia32_expandqi256_mask:
- case X86::BI__builtin_ia32_expandqi512_mask:
- case X86::BI__builtin_ia32_compressdf128_mask:
- case X86::BI__builtin_ia32_compressdf256_mask:
- case X86::BI__builtin_ia32_compressdf512_mask:
- case X86::BI__builtin_ia32_compresssf128_mask:
- case X86::BI__builtin_ia32_compresssf256_mask:
- case X86::BI__builtin_ia32_compresssf512_mask:
- case X86::BI__builtin_ia32_compressdi128_mask:
- case X86::BI__builtin_ia32_compressdi256_mask:
- case X86::BI__builtin_ia32_compressdi512_mask:
- case X86::BI__builtin_ia32_compresssi128_mask:
- case X86::BI__builtin_ia32_compresssi256_mask:
- case X86::BI__builtin_ia32_compresssi512_mask:
- case X86::BI__builtin_ia32_compresshi128_mask:
- case X86::BI__builtin_ia32_compresshi256_mask:
- case X86::BI__builtin_ia32_compresshi512_mask:
- case X86::BI__builtin_ia32_compressqi128_mask:
- case X86::BI__builtin_ia32_compressqi256_mask:
- case X86::BI__builtin_ia32_compressqi512_mask:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
- case X86::BI__builtin_ia32_gather3div2df:
- case X86::BI__builtin_ia32_gather3div2di:
- case X86::BI__builtin_ia32_gather3div4df:
- case X86::BI__builtin_ia32_gather3div4di:
- case X86::BI__builtin_ia32_gather3div4sf:
- case X86::BI__builtin_ia32_gather3div4si:
- case X86::BI__builtin_ia32_gather3div8sf:
- case X86::BI__builtin_ia32_gather3div8si:
- case X86::BI__builtin_ia32_gather3siv2df:
- case X86::BI__builtin_ia32_gather3siv2di:
- case X86::BI__builtin_ia32_gather3siv4df:
- case X86::BI__builtin_ia32_gather3siv4di:
- case X86::BI__builtin_ia32_gather3siv4sf:
- case X86::BI__builtin_ia32_gather3siv4si:
- case X86::BI__builtin_ia32_gather3siv8sf:
- case X86::BI__builtin_ia32_gather3siv8si:
- case X86::BI__builtin_ia32_gathersiv8df:
- case X86::BI__builtin_ia32_gathersiv16sf:
- case X86::BI__builtin_ia32_gatherdiv8df:
- case X86::BI__builtin_ia32_gatherdiv16sf:
- case X86::BI__builtin_ia32_gathersiv8di:
- case X86::BI__builtin_ia32_gathersiv16si:
- case X86::BI__builtin_ia32_gatherdiv8di:
- case X86::BI__builtin_ia32_gatherdiv16si: {
- StringRef intrinsicName;
- switch (builtinID) {
- default:
- llvm_unreachable("Unexpected builtin");
- case X86::BI__builtin_ia32_gather3div2df:
- intrinsicName = "x86.avx512.mask.gather3div2.df";
- break;
- case X86::BI__builtin_ia32_gather3div2di:
- intrinsicName = "x86.avx512.mask.gather3div2.di";
- break;
- case X86::BI__builtin_ia32_gather3div4df:
- intrinsicName = "x86.avx512.mask.gather3div4.df";
- break;
- case X86::BI__builtin_ia32_gather3div4di:
- intrinsicName = "x86.avx512.mask.gather3div4.di";
- break;
- case X86::BI__builtin_ia32_gather3div4sf:
- intrinsicName = "x86.avx512.mask.gather3div4.sf";
- break;
- case X86::BI__builtin_ia32_gather3div4si:
- intrinsicName = "x86.avx512.mask.gather3div4.si";
- break;
- case X86::BI__builtin_ia32_gather3div8sf:
- intrinsicName = "x86.avx512.mask.gather3div8.sf";
- break;
- case X86::BI__builtin_ia32_gather3div8si:
- intrinsicName = "x86.avx512.mask.gather3div8.si";
- break;
- case X86::BI__builtin_ia32_gather3siv2df:
- intrinsicName = "x86.avx512.mask.gather3siv2.df";
- break;
- case X86::BI__builtin_ia32_gather3siv2di:
- intrinsicName = "x86.avx512.mask.gather3siv2.di";
- break;
- case X86::BI__builtin_ia32_gather3siv4df:
- intrinsicName = "x86.avx512.mask.gather3siv4.df";
- break;
- case X86::BI__builtin_ia32_gather3siv4di:
- intrinsicName = "x86.avx512.mask.gather3siv4.di";
- break;
- case X86::BI__builtin_ia32_gather3siv4sf:
- intrinsicName = "x86.avx512.mask.gather3siv4.sf";
- break;
- case X86::BI__builtin_ia32_gather3siv4si:
- intrinsicName = "x86.avx512.mask.gather3siv4.si";
- break;
- case X86::BI__builtin_ia32_gather3siv8sf:
- intrinsicName = "x86.avx512.mask.gather3siv8.sf";
- break;
- case X86::BI__builtin_ia32_gather3siv8si:
- intrinsicName = "x86.avx512.mask.gather3siv8.si";
- break;
- case X86::BI__builtin_ia32_gathersiv8df:
- intrinsicName = "x86.avx512.mask.gather.dpd.512";
- break;
- case X86::BI__builtin_ia32_gathersiv16sf:
- intrinsicName = "x86.avx512.mask.gather.dps.512";
- break;
- case X86::BI__builtin_ia32_gatherdiv8df:
- intrinsicName = "x86.avx512.mask.gather.qpd.512";
- break;
- case X86::BI__builtin_ia32_gatherdiv16sf:
- intrinsicName = "x86.avx512.mask.gather.qps.512";
- break;
- case X86::BI__builtin_ia32_gathersiv8di:
- intrinsicName = "x86.avx512.mask.gather.dpq.512";
- break;
- case X86::BI__builtin_ia32_gathersiv16si:
- intrinsicName = "x86.avx512.mask.gather.dpi.512";
- break;
- case X86::BI__builtin_ia32_gatherdiv8di:
- intrinsicName = "x86.avx512.mask.gather.qpq.512";
- break;
- case X86::BI__builtin_ia32_gatherdiv16si:
- intrinsicName = "x86.avx512.mask.gather.qpi.512";
- break;
- }
-
- mlir::Location loc = getLoc(expr->getExprLoc());
- unsigned minElts =
- std::min(cast<cir::VectorType>(ops[0].getType()).getSize(),
- cast<cir::VectorType>(ops[2].getType()).getSize());
- ops[3] = getMaskVecValue(builder, loc, ops[3], minElts);
- return emitIntrinsicCallOp(builder, loc, intrinsicName,
- convertType(expr->getType()), ops);
- }
- case X86::BI__builtin_ia32_scattersiv8df:
- case X86::BI__builtin_ia32_scattersiv16sf:
- case X86::BI__builtin_ia32_scatterdiv8df:
- case X86::BI__builtin_ia32_scatterdiv16sf:
- case X86::BI__builtin_ia32_scattersiv8di:
- case X86::BI__builtin_ia32_scattersiv16si:
- case X86::BI__builtin_ia32_scatterdiv8di:
- case X86::BI__builtin_ia32_scatterdiv16si:
- case X86::BI__builtin_ia32_scatterdiv2df:
- case X86::BI__builtin_ia32_scatterdiv2di:
- case X86::BI__builtin_ia32_scatterdiv4df:
- case X86::BI__builtin_ia32_scatterdiv4di:
- case X86::BI__builtin_ia32_scatterdiv4sf:
- case X86::BI__builtin_ia32_scatterdiv4si:
- case X86::BI__builtin_ia32_scatterdiv8sf:
- case X86::BI__builtin_ia32_scatterdiv8si:
- case X86::BI__builtin_ia32_scattersiv2df:
- case X86::BI__builtin_ia32_scattersiv2di:
- case X86::BI__builtin_ia32_scattersiv4df:
- case X86::BI__builtin_ia32_scattersiv4di:
- case X86::BI__builtin_ia32_scattersiv4sf:
- case X86::BI__builtin_ia32_scattersiv4si:
- case X86::BI__builtin_ia32_scattersiv8sf:
- case X86::BI__builtin_ia32_scattersiv8si: {
- llvm::StringRef intrinsicName;
- switch (builtinID) {
- default:
- llvm_unreachable("Unexpected builtin");
- case X86::BI__builtin_ia32_scattersiv8df:
- intrinsicName = "x86.avx512.mask.scatter.dpd.512";
- break;
- case X86::BI__builtin_ia32_scattersiv16sf:
- intrinsicName = "x86.avx512.mask.scatter.dps.512";
- break;
- case X86::BI__builtin_ia32_scatterdiv8df:
- intrinsicName = "x86.avx512.mask.scatter.qpd.512";
- break;
- case X86::BI__builtin_ia32_scatterdiv16sf:
- intrinsicName = "x86.avx512.mask.scatter.qps.512";
- break;
- case X86::BI__builtin_ia32_scattersiv8di:
- intrinsicName = "x86.avx512.mask.scatter.dpq.512";
- break;
- case X86::BI__builtin_ia32_scattersiv16si:
- intrinsicName = "x86.avx512.mask.scatter.dpi.512";
- break;
- case X86::BI__builtin_ia32_scatterdiv8di:
- intrinsicName = "x86.avx512.mask.scatter.qpq.512";
- break;
- case X86::BI__builtin_ia32_scatterdiv16si:
- intrinsicName = "x86.avx512.mask.scatter.qpi.512";
- break;
- case X86::BI__builtin_ia32_scatterdiv2df:
- intrinsicName = "x86.avx512.mask.scatterdiv2.df";
- break;
- case X86::BI__builtin_ia32_scatterdiv2di:
- intrinsicName = "x86.avx512.mask.scatterdiv2.di";
- break;
- case X86::BI__builtin_ia32_scatterdiv4df:
- intrinsicName = "x86.avx512.mask.scatterdiv4.df";
- break;
- case X86::BI__builtin_ia32_scatterdiv4di:
- intrinsicName = "x86.avx512.mask.scatterdiv4.di";
- break;
- case X86::BI__builtin_ia32_scatterdiv4sf:
- intrinsicName = "x86.avx512.mask.scatterdiv4.sf";
- break;
- case X86::BI__builtin_ia32_scatterdiv4si:
- intrinsicName = "x86.avx512.mask.scatterdiv4.si";
- break;
- case X86::BI__builtin_ia32_scatterdiv8sf:
- intrinsicName = "x86.avx512.mask.scatterdiv8.sf";
- break;
- case X86::BI__builtin_ia32_scatterdiv8si:
- intrinsicName = "x86.avx512.mask.scatterdiv8.si";
- break;
- case X86::BI__builtin_ia32_scattersiv2df:
- intrinsicName = "x86.avx512.mask.scattersiv2.df";
- break;
- case X86::BI__builtin_ia32_scattersiv2di:
- intrinsicName = "x86.avx512.mask.scattersiv2.di";
- break;
- case X86::BI__builtin_ia32_scattersiv4df:
- intrinsicName = "x86.avx512.mask.scattersiv4.df";
- break;
- case X86::BI__builtin_ia32_scattersiv4di:
- intrinsicName = "x86.avx512.mask.scattersiv4.di";
- break;
- case X86::BI__builtin_ia32_scattersiv4sf:
- intrinsicName = "x86.avx512.mask.scattersiv4.sf";
- break;
- case X86::BI__builtin_ia32_scattersiv4si:
- intrinsicName = "x86.avx512.mask.scattersiv4.si";
- break;
- case X86::BI__builtin_ia32_scattersiv8sf:
- intrinsicName = "x86.avx512.mask.scattersiv8.sf";
- break;
- case X86::BI__builtin_ia32_scattersiv8si:
- intrinsicName = "x86.avx512.mask.scattersiv8.si";
- break;
- }
-
- mlir::Location loc = getLoc(expr->getExprLoc());
- unsigned minElts =
- std::min(cast<cir::VectorType>(ops[2].getType()).getSize(),
- cast<cir::VectorType>(ops[3].getType()).getSize());
- ops[1] = getMaskVecValue(builder, loc, ops[1], minElts);
-
- return emitIntrinsicCallOp(builder, loc, intrinsicName,
- convertType(expr->getType()), ops);
- }
- case X86::BI__builtin_ia32_vextractf128_pd256:
- case X86::BI__builtin_ia32_vextractf128_ps256:
- case X86::BI__builtin_ia32_vextractf128_si256:
- case X86::BI__builtin_ia32_extract128i256:
- case X86::BI__builtin_ia32_extractf64x4_mask:
- case X86::BI__builtin_ia32_extractf32x4_mask:
- case X86::BI__builtin_ia32_extracti64x4_mask:
- case X86::BI__builtin_ia32_extracti32x4_mask:
- case X86::BI__builtin_ia32_extractf32x8_mask:
- case X86::BI__builtin_ia32_extracti32x8_mask:
- case X86::BI__builtin_ia32_extractf32x4_256_mask:
- case X86::BI__builtin_ia32_extracti32x4_256_mask:
- case X86::BI__builtin_ia32_extractf64x2_256_mask:
- case X86::BI__builtin_ia32_extracti64x2_256_mask:
- case X86::BI__builtin_ia32_extractf64x2_512_mask:
- case X86::BI__builtin_ia32_extracti64x2_512_mask:
- case X86::BI__builtin_ia32_vinsertf128_pd256:
- case X86::BI__builtin_ia32_vinsertf128_ps256:
- case X86::BI__builtin_ia32_vinsertf128_si256:
- case X86::BI__builtin_ia32_insert128i256:
- case X86::BI__builtin_ia32_insertf64x4:
- case X86::BI__builtin_ia32_insertf32x4:
- case X86::BI__builtin_ia32_inserti64x4:
- case X86::BI__builtin_ia32_inserti32x4:
- case X86::BI__builtin_ia32_insertf32x8:
- case X86::BI__builtin_ia32_inserti32x8:
- case X86::BI__builtin_ia32_insertf32x4_256:
- case X86::BI__builtin_ia32_inserti32x4_256:
- case X86::BI__builtin_ia32_insertf64x2_256:
- case X86::BI__builtin_ia32_inserti64x2_256:
- case X86::BI__builtin_ia32_insertf64x2_512:
- case X86::BI__builtin_ia32_inserti64x2_512:
- case X86::BI__builtin_ia32_pmovqd512_mask:
- case X86::BI__builtin_ia32_pmovwb512_mask:
- case X86::BI__builtin_ia32_pblendw128:
- case X86::BI__builtin_ia32_blendpd:
- case X86::BI__builtin_ia32_blendps:
- case X86::BI__builtin_ia32_blendpd256:
- case X86::BI__builtin_ia32_blendps256:
- case X86::BI__builtin_ia32_pblendw256:
- case X86::BI__builtin_ia32_pblendd128:
- case X86::BI__builtin_ia32_pblendd256:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
- case X86::BI__builtin_ia32_pshuflw:
- case X86::BI__builtin_ia32_pshuflw256:
- case X86::BI__builtin_ia32_pshuflw512:
- return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()),
- true);
- case X86::BI__builtin_ia32_pshufhw:
- case X86::BI__builtin_ia32_pshufhw256:
- case X86::BI__builtin_ia32_pshufhw512:
- return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()),
- false);
- case X86::BI__builtin_ia32_pshufd:
- case X86::BI__builtin_ia32_pshufd256:
- case X86::BI__builtin_ia32_pshufd512:
- case X86::BI__builtin_ia32_vpermilpd:
- case X86::BI__builtin_ia32_vpermilps:
- case X86::BI__builtin_ia32_vpermilpd256:
- case X86::BI__builtin_ia32_vpermilps256:
- case X86::BI__builtin_ia32_vpermilpd512:
- case X86::BI__builtin_ia32_vpermilps512: {
- const uint32_t imm = getSExtIntValueFromConstOp(ops[1]);
-
- llvm::SmallVector<int64_t, 16> mask(16);
- computeFullLaneShuffleMask(*this, ops[0], imm, false, mask);
-
- return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], mask);
- }
- case X86::BI__builtin_ia32_shufpd:
- case X86::BI__builtin_ia32_shufpd256:
- case X86::BI__builtin_ia32_shufpd512:
- case X86::BI__builtin_ia32_shufps:
- case X86::BI__builtin_ia32_shufps256:
- case X86::BI__builtin_ia32_shufps512: {
- const uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
-
- llvm::SmallVector<int64_t, 16> mask(16);
- computeFullLaneShuffleMask(*this, ops[0], imm, true, mask);
-
- return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
- mask);
- }
- case X86::BI__builtin_ia32_permdi256:
- case X86::BI__builtin_ia32_permdf256:
- case X86::BI__builtin_ia32_permdi512:
- case X86::BI__builtin_ia32_permdf512:
- case X86::BI__builtin_ia32_palignr128:
- case X86::BI__builtin_ia32_palignr256:
- case X86::BI__builtin_ia32_palignr512:
- case X86::BI__builtin_ia32_alignd128:
- case X86::BI__builtin_ia32_alignd256:
- case X86::BI__builtin_ia32_alignd512:
- case X86::BI__builtin_ia32_alignq128:
- case X86::BI__builtin_ia32_alignq256:
- case X86::BI__builtin_ia32_alignq512:
- case X86::BI__builtin_ia32_shuf_f32x4_256:
- case X86::BI__builtin_ia32_shuf_f64x2_256:
- case X86::BI__builtin_ia32_shuf_i32x4_256:
- case X86::BI__builtin_ia32_shuf_i64x2_256:
- case X86::BI__builtin_ia32_shuf_f32x4:
- case X86::BI__builtin_ia32_shuf_f64x2:
- case X86::BI__builtin_ia32_shuf_i32x4:
- case X86::BI__builtin_ia32_shuf_i64x2:
- case X86::BI__builtin_ia32_vperm2f128_pd256:
- case X86::BI__builtin_ia32_vperm2f128_ps256:
- case X86::BI__builtin_ia32_vperm2f128_si256:
- case X86::BI__builtin_ia32_permti256:
- case X86::BI__builtin_ia32_pslldqi128_byteshift:
- case X86::BI__builtin_ia32_pslldqi256_byteshift:
- case X86::BI__builtin_ia32_pslldqi512_byteshift:
- case X86::BI__builtin_ia32_psrldqi128_byteshift:
- case X86::BI__builtin_ia32_psrldqi256_byteshift:
- case X86::BI__builtin_ia32_psrldqi512_byteshift:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
- case X86::BI__builtin_ia32_kshiftliqi:
- case X86::BI__builtin_ia32_kshiftlihi:
- case X86::BI__builtin_ia32_kshiftlisi:
- case X86::BI__builtin_ia32_kshiftlidi: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- unsigned shiftVal =
- ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
- 0xff;
- unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
-
- if (shiftVal >= numElems)
- return builder.getNullValue(ops[0].getType(), loc);
-
- mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems);
-
- SmallVector<mlir::Attribute, 64> indices;
- mlir::Type i32Ty = builder.getSInt32Ty();
- for (auto i : llvm::seq<unsigned>(0, numElems))
- indices.push_back(cir::IntAttr::get(i32Ty, numElems + i - shiftVal));
-
- mlir::Value zero = builder.getNullValue(in.getType(), loc);
- mlir::Value sv = builder.createVecShuffle(loc, zero, in, indices);
- return builder.createBitcast(sv, ops[0].getType());
- }
- case X86::BI__builtin_ia32_kshiftriqi:
- case X86::BI__builtin_ia32_kshiftrihi:
- case X86::BI__builtin_ia32_kshiftrisi:
- case X86::BI__builtin_ia32_kshiftridi: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- unsigned shiftVal =
- ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
- 0xff;
- unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
-
- if (shiftVal >= numElems)
- return builder.getNullValue(ops[0].getType(), loc);
-
- mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems);
-
- SmallVector<mlir::Attribute, 64> indices;
- mlir::Type i32Ty = builder.getSInt32Ty();
- for (auto i : llvm::seq<unsigned>(0, numElems))
- indices.push_back(cir::IntAttr::get(i32Ty, i + shiftVal));
-
- mlir::Value zero = builder.getNullValue(in.getType(), loc);
- mlir::Value sv = builder.createVecShuffle(loc, in, zero, indices);
- return builder.createBitcast(sv, ops[0].getType());
- }
- case X86::BI__builtin_ia32_vprotbi:
- case X86::BI__builtin_ia32_vprotwi:
- case X86::BI__builtin_ia32_vprotdi:
- case X86::BI__builtin_ia32_vprotqi:
- case X86::BI__builtin_ia32_prold128:
- case X86::BI__builtin_ia32_prold256:
- case X86::BI__builtin_ia32_prold512:
- case X86::BI__builtin_ia32_prolq128:
- case X86::BI__builtin_ia32_prolq256:
- case X86::BI__builtin_ia32_prolq512:
- return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0],
- ops[0], ops[1], false);
- case X86::BI__builtin_ia32_prord128:
- case X86::BI__builtin_ia32_prord256:
- case X86::BI__builtin_ia32_prord512:
- case X86::BI__builtin_ia32_prorq128:
- case X86::BI__builtin_ia32_prorq256:
- case X86::BI__builtin_ia32_prorq512:
- return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0],
- ops[0], ops[1], true);
- case X86::BI__builtin_ia32_selectb_128:
- case X86::BI__builtin_ia32_selectb_256:
- case X86::BI__builtin_ia32_selectb_512:
- case X86::BI__builtin_ia32_selectw_128:
- case X86::BI__builtin_ia32_selectw_256:
- case X86::BI__builtin_ia32_selectw_512:
- case X86::BI__builtin_ia32_selectd_128:
- case X86::BI__builtin_ia32_selectd_256:
- case X86::BI__builtin_ia32_selectd_512:
- case X86::BI__builtin_ia32_selectq_128:
- case X86::BI__builtin_ia32_selectq_256:
- case X86::BI__builtin_ia32_selectq_512:
- case X86::BI__builtin_ia32_selectph_128:
- case X86::BI__builtin_ia32_selectph_256:
- case X86::BI__builtin_ia32_selectph_512:
- case X86::BI__builtin_ia32_selectpbf_128:
- case X86::BI__builtin_ia32_selectpbf_256:
- case X86::BI__builtin_ia32_selectpbf_512:
- case X86::BI__builtin_ia32_selectps_128:
- case X86::BI__builtin_ia32_selectps_256:
- case X86::BI__builtin_ia32_selectps_512:
- case X86::BI__builtin_ia32_selectpd_128:
- case X86::BI__builtin_ia32_selectpd_256:
- case X86::BI__builtin_ia32_selectpd_512:
- case X86::BI__builtin_ia32_selectsh_128:
- case X86::BI__builtin_ia32_selectsbf_128:
- case X86::BI__builtin_ia32_selectss_128:
- case X86::BI__builtin_ia32_selectsd_128:
- case X86::BI__builtin_ia32_cmpb128_mask:
- case X86::BI__builtin_ia32_cmpb256_mask:
- case X86::BI__builtin_ia32_cmpb512_mask:
- case X86::BI__builtin_ia32_cmpw128_mask:
- case X86::BI__builtin_ia32_cmpw256_mask:
- case X86::BI__builtin_ia32_cmpw512_mask:
- case X86::BI__builtin_ia32_cmpd128_mask:
- case X86::BI__builtin_ia32_cmpd256_mask:
- case X86::BI__builtin_ia32_cmpd512_mask:
- case X86::BI__builtin_ia32_cmpq128_mask:
- case X86::BI__builtin_ia32_cmpq256_mask:
- case X86::BI__builtin_ia32_cmpq512_mask:
- case X86::BI__builtin_ia32_ucmpb128_mask:
- case X86::BI__builtin_ia32_ucmpb256_mask:
- case X86::BI__builtin_ia32_ucmpb512_mask:
- case X86::BI__builtin_ia32_ucmpw128_mask:
- case X86::BI__builtin_ia32_ucmpw256_mask:
- case X86::BI__builtin_ia32_ucmpw512_mask:
- case X86::BI__builtin_ia32_ucmpd128_mask:
- case X86::BI__builtin_ia32_ucmpd256_mask:
- case X86::BI__builtin_ia32_ucmpd512_mask:
- case X86::BI__builtin_ia32_ucmpq128_mask:
- case X86::BI__builtin_ia32_ucmpq256_mask:
- case X86::BI__builtin_ia32_ucmpq512_mask:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
- case X86::BI__builtin_ia32_vpcomb:
- case X86::BI__builtin_ia32_vpcomw:
- case X86::BI__builtin_ia32_vpcomd:
- case X86::BI__builtin_ia32_vpcomq:
- return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true);
- case X86::BI__builtin_ia32_vpcomub:
- case X86::BI__builtin_ia32_vpcomuw:
- case X86::BI__builtin_ia32_vpcomud:
- case X86::BI__builtin_ia32_vpcomuq:
- return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false);
- case X86::BI__builtin_ia32_kortestcqi:
- case X86::BI__builtin_ia32_kortestchi:
- case X86::BI__builtin_ia32_kortestcsi:
- case X86::BI__builtin_ia32_kortestcdi: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- cir::IntType ty = cast<cir::IntType>(ops[0].getType());
- mlir::Value allOnesOp =
- builder.getConstAPInt(loc, ty, APInt::getAllOnes(ty.getWidth()));
- mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
- mlir::Value cmp =
- cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allOnesOp);
- return builder.createCast(cir::CastKind::bool_to_int, cmp,
- cgm.convertType(expr->getType()));
- }
- case X86::BI__builtin_ia32_kortestzqi:
- case X86::BI__builtin_ia32_kortestzhi:
- case X86::BI__builtin_ia32_kortestzsi:
- case X86::BI__builtin_ia32_kortestzdi: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- cir::IntType ty = cast<cir::IntType>(ops[0].getType());
- mlir::Value allZerosOp = builder.getNullValue(ty, loc).getResult();
- mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
- mlir::Value cmp =
- cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allZerosOp);
- return builder.createCast(cir::CastKind::bool_to_int, cmp,
- cgm.convertType(expr->getType()));
- }
- case X86::BI__builtin_ia32_ktestcqi:
- return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.ktestc.b", ops);
- case X86::BI__builtin_ia32_ktestzqi:
- return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.ktestz.b", ops);
- case X86::BI__builtin_ia32_ktestchi:
- return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.ktestc.w", ops);
- case X86::BI__builtin_ia32_ktestzhi:
- return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.ktestz.w", ops);
- case X86::BI__builtin_ia32_ktestcsi:
- return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.ktestc.d", ops);
- case X86::BI__builtin_ia32_ktestzsi:
- return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.ktestz.d", ops);
- case X86::BI__builtin_ia32_ktestcdi:
- return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.ktestc.q", ops);
- case X86::BI__builtin_ia32_ktestzdi:
- return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.ktestz.q", ops);
- case X86::BI__builtin_ia32_kaddqi:
- return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.kadd.b", ops);
- case X86::BI__builtin_ia32_kaddhi:
- return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.kadd.w", ops);
- case X86::BI__builtin_ia32_kaddsi:
- return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.kadd.d", ops);
- case X86::BI__builtin_ia32_kadddi:
- return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.kadd.q", ops);
- case X86::BI__builtin_ia32_kandqi:
- case X86::BI__builtin_ia32_kandhi:
- case X86::BI__builtin_ia32_kandsi:
- case X86::BI__builtin_ia32_kanddi:
- return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
- cir::BinOpKind::And, ops);
- case X86::BI__builtin_ia32_kandnqi:
- case X86::BI__builtin_ia32_kandnhi:
- case X86::BI__builtin_ia32_kandnsi:
- case X86::BI__builtin_ia32_kandndi:
- return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
- cir::BinOpKind::And, ops, true);
- case X86::BI__builtin_ia32_korqi:
- case X86::BI__builtin_ia32_korhi:
- case X86::BI__builtin_ia32_korsi:
- case X86::BI__builtin_ia32_kordi:
- return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
- cir::BinOpKind::Or, ops);
- case X86::BI__builtin_ia32_kxnorqi:
- case X86::BI__builtin_ia32_kxnorhi:
- case X86::BI__builtin_ia32_kxnorsi:
- case X86::BI__builtin_ia32_kxnordi:
- return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
- cir::BinOpKind::Xor, ops, true);
- case X86::BI__builtin_ia32_kxorqi:
- case X86::BI__builtin_ia32_kxorhi:
- case X86::BI__builtin_ia32_kxorsi:
- case X86::BI__builtin_ia32_kxordi:
- return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
- cir::BinOpKind::Xor, ops);
- case X86::BI__builtin_ia32_knotqi:
- case X86::BI__builtin_ia32_knothi:
- case X86::BI__builtin_ia32_knotsi:
- case X86::BI__builtin_ia32_knotdi: {
- cir::IntType intTy = cast<cir::IntType>(ops[0].getType());
- unsigned numElts = intTy.getWidth();
- mlir::Value resVec =
- getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts);
- return builder.createBitcast(builder.createNot(resVec), ops[0].getType());
- }
- case X86::BI__builtin_ia32_kmovb:
- case X86::BI__builtin_ia32_kmovw:
- case X86::BI__builtin_ia32_kmovd:
- case X86::BI__builtin_ia32_kmovq: {
- // Bitcast to vXi1 type and then back to integer. This gets the mask
- // register type into the IR, but might be optimized out depending on
- // what's around it.
- cir::IntType intTy = cast<cir::IntType>(ops[0].getType());
- unsigned numElts = intTy.getWidth();
- mlir::Value resVec =
- getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts);
- return builder.createBitcast(resVec, ops[0].getType());
- }
- case X86::BI__builtin_ia32_sqrtsh_round_mask:
- case X86::BI__builtin_ia32_sqrtsd_round_mask:
- case X86::BI__builtin_ia32_sqrtss_round_mask:
- errorNYI("Unimplemented builtin");
- return {};
- case X86::BI__builtin_ia32_sqrtph512:
- case X86::BI__builtin_ia32_sqrtps512:
- case X86::BI__builtin_ia32_sqrtpd512: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- mlir::Value arg = ops[0];
- return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult();
- }
- case X86::BI__builtin_ia32_pmuludq128:
- case X86::BI__builtin_ia32_pmuludq256:
- case X86::BI__builtin_ia32_pmuludq512: {
- unsigned opTypePrimitiveSizeInBits =
- cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
- return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ false,
- ops, opTypePrimitiveSizeInBits);
- }
- case X86::BI__builtin_ia32_pmuldq128:
- case X86::BI__builtin_ia32_pmuldq256:
- case X86::BI__builtin_ia32_pmuldq512: {
- unsigned opTypePrimitiveSizeInBits =
- cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
- return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ true,
- ops, opTypePrimitiveSizeInBits);
- }
- case X86::BI__builtin_ia32_pternlogd512_mask:
- case X86::BI__builtin_ia32_pternlogq512_mask:
- case X86::BI__builtin_ia32_pternlogd128_mask:
- case X86::BI__builtin_ia32_pternlogd256_mask:
- case X86::BI__builtin_ia32_pternlogq128_mask:
- case X86::BI__builtin_ia32_pternlogq256_mask:
- case X86::BI__builtin_ia32_pternlogd512_maskz:
- case X86::BI__builtin_ia32_pternlogq512_maskz:
- case X86::BI__builtin_ia32_pternlogd128_maskz:
- case X86::BI__builtin_ia32_pternlogd256_maskz:
- case X86::BI__builtin_ia32_pternlogq128_maskz:
- case X86::BI__builtin_ia32_pternlogq256_maskz:
- case X86::BI__builtin_ia32_vpshldd128:
- case X86::BI__builtin_ia32_vpshldd256:
- case X86::BI__builtin_ia32_vpshldd512:
- case X86::BI__builtin_ia32_vpshldq128:
- case X86::BI__builtin_ia32_vpshldq256:
- case X86::BI__builtin_ia32_vpshldq512:
- case X86::BI__builtin_ia32_vpshldw128:
- case X86::BI__builtin_ia32_vpshldw256:
- case X86::BI__builtin_ia32_vpshldw512:
- case X86::BI__builtin_ia32_vpshrdd128:
- case X86::BI__builtin_ia32_vpshrdd256:
- case X86::BI__builtin_ia32_vpshrdd512:
- case X86::BI__builtin_ia32_vpshrdq128:
- case X86::BI__builtin_ia32_vpshrdq256:
- case X86::BI__builtin_ia32_vpshrdq512:
- case X86::BI__builtin_ia32_vpshrdw128:
- case X86::BI__builtin_ia32_vpshrdw256:
- case X86::BI__builtin_ia32_vpshrdw512:
- case X86::BI__builtin_ia32_reduce_fadd_pd512:
- case X86::BI__builtin_ia32_reduce_fadd_ps512:
- case X86::BI__builtin_ia32_reduce_fadd_ph512:
- case X86::BI__builtin_ia32_reduce_fadd_ph256:
- case X86::BI__builtin_ia32_reduce_fadd_ph128:
- case X86::BI__builtin_ia32_reduce_fmul_pd512:
- case X86::BI__builtin_ia32_reduce_fmul_ps512:
- case X86::BI__builtin_ia32_reduce_fmul_ph512:
- case X86::BI__builtin_ia32_reduce_fmul_ph256:
- case X86::BI__builtin_ia32_reduce_fmul_ph128:
- case X86::BI__builtin_ia32_reduce_fmax_pd512:
- case X86::BI__builtin_ia32_reduce_fmax_ps512:
- case X86::BI__builtin_ia32_reduce_fmax_ph512:
- case X86::BI__builtin_ia32_reduce_fmax_ph256:
- case X86::BI__builtin_ia32_reduce_fmax_ph128:
- case X86::BI__builtin_ia32_reduce_fmin_pd512:
- case X86::BI__builtin_ia32_reduce_fmin_ps512:
- case X86::BI__builtin_ia32_reduce_fmin_ph512:
- case X86::BI__builtin_ia32_reduce_fmin_ph256:
- case X86::BI__builtin_ia32_reduce_fmin_ph128:
- case X86::BI__builtin_ia32_rdrand16_step:
- case X86::BI__builtin_ia32_rdrand32_step:
- case X86::BI__builtin_ia32_rdrand64_step:
- case X86::BI__builtin_ia32_rdseed16_step:
- case X86::BI__builtin_ia32_rdseed32_step:
- case X86::BI__builtin_ia32_rdseed64_step:
- case X86::BI__builtin_ia32_addcarryx_u32:
- case X86::BI__builtin_ia32_addcarryx_u64:
- case X86::BI__builtin_ia32_subborrow_u32:
- case X86::BI__builtin_ia32_subborrow_u64:
- case X86::BI__builtin_ia32_fpclassps128_mask:
- case X86::BI__builtin_ia32_fpclassps256_mask:
- case X86::BI__builtin_ia32_fpclassps512_mask:
- case X86::BI__builtin_ia32_vfpclassbf16128_mask:
- case X86::BI__builtin_ia32_vfpclassbf16256_mask:
- case X86::BI__builtin_ia32_vfpclassbf16512_mask:
- case X86::BI__builtin_ia32_fpclassph128_mask:
- case X86::BI__builtin_ia32_fpclassph256_mask:
- case X86::BI__builtin_ia32_fpclassph512_mask:
- case X86::BI__builtin_ia32_fpclasspd128_mask:
- case X86::BI__builtin_ia32_fpclasspd256_mask:
- case X86::BI__builtin_ia32_fpclasspd512_mask:
- case X86::BI__builtin_ia32_vp2intersect_q_512:
- case X86::BI__builtin_ia32_vp2intersect_q_256:
- case X86::BI__builtin_ia32_vp2intersect_q_128:
- case X86::BI__builtin_ia32_vp2intersect_d_512:
- case X86::BI__builtin_ia32_vp2intersect_d_256:
- case X86::BI__builtin_ia32_vp2intersect_d_128:
- case X86::BI__builtin_ia32_vpmultishiftqb128:
- case X86::BI__builtin_ia32_vpmultishiftqb256:
- case X86::BI__builtin_ia32_vpmultishiftqb512:
- case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
- case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
- case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
- case X86::BI__builtin_ia32_cmpeqps:
- case X86::BI__builtin_ia32_cmpeqpd:
- case X86::BI__builtin_ia32_cmpltps:
- case X86::BI__builtin_ia32_cmpltpd:
- case X86::BI__builtin_ia32_cmpleps:
- case X86::BI__builtin_ia32_cmplepd:
- case X86::BI__builtin_ia32_cmpunordps:
- case X86::BI__builtin_ia32_cmpunordpd:
- case X86::BI__builtin_ia32_cmpneqps:
- case X86::BI__builtin_ia32_cmpneqpd:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
- case X86::BI__builtin_ia32_cmpnltps:
- case X86::BI__builtin_ia32_cmpnltpd:
- return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
- cir::CmpOpKind::lt, /*shouldInvert=*/true);
- case X86::BI__builtin_ia32_cmpnleps:
- case X86::BI__builtin_ia32_cmpnlepd:
- return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
- cir::CmpOpKind::le, /*shouldInvert=*/true);
- case X86::BI__builtin_ia32_cmpordps:
- case X86::BI__builtin_ia32_cmpordpd:
- case X86::BI__builtin_ia32_cmpph128_mask:
- case X86::BI__builtin_ia32_cmpph256_mask:
- case X86::BI__builtin_ia32_cmpph512_mask:
- case X86::BI__builtin_ia32_cmpps128_mask:
- case X86::BI__builtin_ia32_cmpps256_mask:
- case X86::BI__builtin_ia32_cmpps512_mask:
- case X86::BI__builtin_ia32_cmppd128_mask:
- case X86::BI__builtin_ia32_cmppd256_mask:
- case X86::BI__builtin_ia32_cmppd512_mask:
- case X86::BI__builtin_ia32_vcmpbf16512_mask:
- case X86::BI__builtin_ia32_vcmpbf16256_mask:
- case X86::BI__builtin_ia32_vcmpbf16128_mask:
- case X86::BI__builtin_ia32_cmpps:
- case X86::BI__builtin_ia32_cmpps256:
- case X86::BI__builtin_ia32_cmppd:
- case X86::BI__builtin_ia32_cmppd256:
- case X86::BI__builtin_ia32_cmpeqss:
- case X86::BI__builtin_ia32_cmpltss:
- case X86::BI__builtin_ia32_cmpless:
- case X86::BI__builtin_ia32_cmpunordss:
- case X86::BI__builtin_ia32_cmpneqss:
- case X86::BI__builtin_ia32_cmpnltss:
- case X86::BI__builtin_ia32_cmpnless:
- case X86::BI__builtin_ia32_cmpordss:
- case X86::BI__builtin_ia32_cmpeqsd:
- case X86::BI__builtin_ia32_cmpltsd:
- case X86::BI__builtin_ia32_cmplesd:
- case X86::BI__builtin_ia32_cmpunordsd:
- case X86::BI__builtin_ia32_cmpneqsd:
- case X86::BI__builtin_ia32_cmpnltsd:
- case X86::BI__builtin_ia32_cmpnlesd:
- case X86::BI__builtin_ia32_cmpordsd:
- case X86::BI__builtin_ia32_vcvtph2ps_mask:
- case X86::BI__builtin_ia32_vcvtph2ps256_mask:
- case X86::BI__builtin_ia32_vcvtph2ps512_mask:
- case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
- case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
- case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
- case X86::BI__cpuid:
- case X86::BI__cpuidex:
- case X86::BI__emul:
- case X86::BI__emulu:
- case X86::BI__mulh:
- case X86::BI__umulh:
- case X86::BI_mul128:
- case X86::BI_umul128:
- case X86::BI__faststorefence:
- case X86::BI__shiftleft128:
- case X86::BI__shiftright128:
- case X86::BI_ReadWriteBarrier:
- case X86::BI_ReadBarrier:
- case X86::BI_WriteBarrier:
- case X86::BI_AddressOfReturnAddress:
- case X86::BI__stosb:
- case X86::BI__ud2:
- case X86::BI__int2c:
- case X86::BI__readfsbyte:
- case X86::BI__readfsword:
- case X86::BI__readfsdword:
- case X86::BI__readfsqword:
- case X86::BI__readgsbyte:
- case X86::BI__readgsword:
- case X86::BI__readgsdword:
- case X86::BI__readgsqword:
- case X86::BI__builtin_ia32_encodekey128_u32:
- case X86::BI__builtin_ia32_encodekey256_u32:
- case X86::BI__builtin_ia32_aesenc128kl_u8:
- case X86::BI__builtin_ia32_aesdec128kl_u8:
- case X86::BI__builtin_ia32_aesenc256kl_u8:
- case X86::BI__builtin_ia32_aesdec256kl_u8:
- case X86::BI__builtin_ia32_aesencwide128kl_u8:
- case X86::BI__builtin_ia32_aesdecwide128kl_u8:
- case X86::BI__builtin_ia32_aesencwide256kl_u8:
- case X86::BI__builtin_ia32_aesdecwide256kl_u8:
- case X86::BI__builtin_ia32_vfcmaddcph512_mask:
- case X86::BI__builtin_ia32_vfmaddcph512_mask:
- case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
- case X86::BI__builtin_ia32_vfmaddcsh_round_mask:
- case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
- case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
- case X86::BI__builtin_ia32_prefetchi:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
- }
-}
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit x86/x86_64 Builtin calls as CIR or a function
+// call to be later resolved.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenBuilder.h"
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+#include "mlir/IR/Location.h"
+#include "mlir/IR/ValueRange.h"
+#include "clang/Basic/Builtins.h"
+#include "clang/Basic/TargetBuiltins.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
+
+using namespace clang;
+using namespace clang::CIRGen;
+
+template <typename... Operands>
+static mlir::Value emitIntrinsicCallOp(CIRGenBuilderTy &builder,
+ mlir::Location loc, const StringRef str,
+ const mlir::Type &resTy,
+ Operands &&...op) {
+ return cir::LLVMIntrinsicCallOp::create(builder, loc,
+ builder.getStringAttr(str), resTy,
+ std::forward<Operands>(op)...)
+ .getResult();
+}
+
+// OG has unordered comparison as a form of optimization in addition to
+// ordered comparison, while CIR doesn't.
+//
+// This means that we can't encode the comparison code of UGT (unordered
+// greater than), at least not at the CIR level.
+//
+// The boolean shouldInvert compensates for this.
+// For example: to get to the comparison code UGT, we pass in
+// emitVectorFCmp (OLE, shouldInvert = true) since OLE is the inverse of UGT.
+
+// There are several ways to support this otherwise:
+// - register extra CmpOpKind for unordered comparison types and build the
+// translation code for
+// to go from CIR -> LLVM dialect. Notice we get this naturally with
+// shouldInvert, benefiting from existing infrastructure, albeit having to
+// generate an extra `not` at CIR).
+// - Just add extra comparison code to a new VecCmpOpKind instead of
+// cluttering CmpOpKind.
+// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered
+// comparison
+// - Just emit the intrinsics call instead of calling this helper, see how the
+// LLVM lowering handles this.
+static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder,
+ llvm::SmallVector<mlir::Value> &ops,
+ mlir::Location loc, cir::CmpOpKind pred,
+ bool shouldInvert) {
+ assert(!cir::MissingFeatures::cgFPOptionsRAII());
+ // TODO(cir): Add isSignaling boolean once emitConstrainedFPCall implemented
+ assert(!cir::MissingFeatures::emitConstrainedFPCall());
+ mlir::Value cmp = builder.createVecCompare(loc, pred, ops[0], ops[1]);
+ mlir::Value bitCast = builder.createBitcast(
+ shouldInvert ? builder.createNot(cmp) : cmp, ops[0].getType());
+ return bitCast;
+}
+
+static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc,
+ mlir::Value mask, unsigned numElems) {
+ auto maskTy = cir::VectorType::get(
+ builder.getUIntNTy(1), cast<cir::IntType>(mask.getType()).getWidth());
+ mlir::Value maskVec = builder.createBitcast(mask, maskTy);
+
+ // If we have less than 8 elements, then the starting mask was an i8 and
+ // we need to extract down to the right number of elements.
+ if (numElems < 8) {
+ SmallVector<mlir::Attribute, 4> indices;
+ mlir::Type i32Ty = builder.getSInt32Ty();
+ for (auto i : llvm::seq<unsigned>(0, numElems))
+ indices.push_back(cir::IntAttr::get(i32Ty, i));
+
+ maskVec = builder.createVecShuffle(loc, maskVec, maskVec, indices);
+ }
+ return maskVec;
+}
+
+// Builds the VecShuffleOp for pshuflw and pshufhw x86 builtins.
+//
+// The vector is split into lanes of 8 word elements (16 bits). The lower or
+// upper half of each lane, controlled by `isLow`, is shuffled in the following
+// way: The immediate is truncated to 8 bits, separated into 4 2-bit fields. The
+// i-th field's value represents the resulting index of the i-th element in the
+// half lane after shuffling. The other half of the lane remains unchanged.
+static cir::VecShuffleOp emitPshufWord(CIRGenBuilderTy &builder,
+ const mlir::Value vec,
+ const mlir::Value immediate,
+ const mlir::Location loc,
+ const bool isLow) {
+ uint32_t imm = CIRGenFunction::getZExtIntValueFromConstOp(immediate);
+
+ auto vecTy = cast<cir::VectorType>(vec.getType());
+ unsigned numElts = vecTy.getSize();
+
+ unsigned firstHalfStart = isLow ? 0 : 4;
+ unsigned secondHalfStart = 4 - firstHalfStart;
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ imm = (imm & 0xff) * 0x01010101;
+
+ int64_t indices[32];
+ for (unsigned l = 0; l != numElts; l += 8) {
+ for (unsigned i = firstHalfStart; i != firstHalfStart + 4; ++i) {
+ indices[l + i] = l + (imm & 3) + firstHalfStart;
+ imm >>= 2;
+ }
+ for (unsigned i = secondHalfStart; i != secondHalfStart + 4; ++i)
+ indices[l + i] = l + i;
+ }
+
+ return builder.createVecShuffle(loc, vec, ArrayRef(indices, numElts));
+}
+
+// Builds the shuffle mask for pshufd and shufpd/shufps x86 builtins.
+// The shuffle mask is written to outIndices.
+static void
+computeFullLaneShuffleMask(CIRGenFunction &cgf, const mlir::Value vec,
+ uint32_t imm, const bool isShufP,
+ llvm::SmallVectorImpl<int64_t> &outIndices) {
+ auto vecTy = cast<cir::VectorType>(vec.getType());
+ unsigned numElts = vecTy.getSize();
+ unsigned numLanes = cgf.cgm.getDataLayout().getTypeSizeInBits(vecTy) / 128;
+ unsigned numLaneElts = numElts / numLanes;
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ imm = (imm & 0xff) * 0x01010101;
+
+ for (unsigned l = 0; l != numElts; l += numLaneElts) {
+ for (unsigned i = 0; i != numLaneElts; ++i) {
+ uint32_t idx = imm % numLaneElts;
+ imm /= numLaneElts;
+ if (isShufP && i >= (numLaneElts / 2))
+ idx += numElts;
+ outIndices[l + i] = l + idx;
+ }
+ }
+
+ outIndices.resize(numElts);
+}
+
+static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
+ mlir::Location loc,
+ const std::string &intrinsicName,
+ SmallVectorImpl<mlir::Value> &ops) {
+
+ auto intTy = cast<cir::IntType>(ops[0].getType());
+ unsigned numElts = intTy.getWidth();
+ mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts);
+ mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts);
+ mlir::Type vecTy = lhsVec.getType();
+ mlir::Value resVec = emitIntrinsicCallOp(builder, loc, intrinsicName, vecTy,
+ mlir::ValueRange{lhsVec, rhsVec});
+ return builder.createBitcast(resVec, ops[0].getType());
+}
+
+static mlir::Value emitX86MaskUnpack(CIRGenBuilderTy &builder,
+ mlir::Location loc,
+ const std::string &intrinsicName,
+ SmallVectorImpl<mlir::Value> &ops) {
+ unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
+
+ // Convert both operands to mask vectors.
+ mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElems);
+ mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElems);
+
+ mlir::Type i32Ty = builder.getSInt32Ty();
+
+ // Create indices for extracting the first half of each vector.
+ SmallVector<mlir::Attribute, 32> halfIndices;
+ for (auto i : llvm::seq<unsigned>(0, numElems / 2))
+ halfIndices.push_back(cir::IntAttr::get(i32Ty, i));
+
+ // Extract first half of each vector. This gives better codegen than
+ // doing it in a single shuffle.
+ mlir::Value lhsHalf = builder.createVecShuffle(loc, lhs, lhs, halfIndices);
+ mlir::Value rhsHalf = builder.createVecShuffle(loc, rhs, rhs, halfIndices);
+
+ // Create indices for concatenating the vectors.
+ // NOTE: Operands are swapped to match the intrinsic definition.
+ // After the half extraction, both vectors have numElems/2 elements.
+ // In createVecShuffle(rhsHalf, lhsHalf, indices), indices [0..numElems/2-1]
+ // select from rhsHalf, and indices [numElems/2..numElems-1] select from
+ // lhsHalf.
+ SmallVector<mlir::Attribute, 64> concatIndices;
+ for (auto i : llvm::seq<unsigned>(0, numElems))
+ concatIndices.push_back(cir::IntAttr::get(i32Ty, i));
+
+ // Concat the vectors (RHS first, then LHS).
+ mlir::Value res =
+ builder.createVecShuffle(loc, rhsHalf, lhsHalf, concatIndices);
+ return builder.createBitcast(res, ops[0].getType());
+}
+
+static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder,
+ mlir::Location loc,
+ cir::BinOpKind binOpKind,
+ SmallVectorImpl<mlir::Value> &ops,
+ bool invertLHS = false) {
+ unsigned numElts = cast<cir::IntType>(ops[0].getType()).getWidth();
+ mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElts);
+ mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElts);
+
+ if (invertLHS)
+ lhs = builder.createNot(lhs);
+ return builder.createBitcast(builder.createBinop(loc, lhs, binOpKind, rhs),
+ ops[0].getType());
+}
+
+static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc,
+ const std::string &intrinsicName,
+ SmallVectorImpl<mlir::Value> &ops) {
+ auto intTy = cast<cir::IntType>(ops[0].getType());
+ unsigned numElts = intTy.getWidth();
+ mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts);
+ mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts);
+ mlir::Type resTy = builder.getSInt32Ty();
+ return emitIntrinsicCallOp(builder, loc, intrinsicName, resTy,
+ mlir::ValueRange{lhsVec, rhsVec});
+}
+
+static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc,
+ mlir::Value vec, mlir::Value value,
+ mlir::Value indexOp) {
+ unsigned numElts = cast<cir::VectorType>(vec.getType()).getSize();
+
+ uint64_t index =
+ indexOp.getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
+
+ index &= numElts - 1;
+
+ cir::ConstantOp indexVal = builder.getUInt64(index, loc);
+
+ return cir::VecInsertOp::create(builder, loc, vec, value, indexVal);
+}
+
+static mlir::Value emitX86FunnelShift(CIRGenBuilderTy &builder,
+ mlir::Location location, mlir::Value &op0,
+ mlir::Value &op1, mlir::Value &amt,
+ bool isRight) {
+ mlir::Type op0Ty = op0.getType();
+
+ // Amount may be scalar immediate, in which case create a splat vector.
+ // Funnel shifts amounts are treated as modulo and types are all power-of-2
+ // so we only care about the lowest log2 bits anyway.
+ if (amt.getType() != op0Ty) {
+ auto vecTy = mlir::cast<cir::VectorType>(op0Ty);
+ uint64_t numElems = vecTy.getSize();
+
+ auto amtTy = mlir::cast<cir::IntType>(amt.getType());
+ auto vecElemTy = mlir::cast<cir::IntType>(vecTy.getElementType());
+
+ // If signed, cast to the same width but unsigned first to
+ // ensure zero-extension when casting to a bigger unsigned `vecElemeTy`.
+ if (amtTy.isSigned()) {
+ cir::IntType unsignedAmtTy = builder.getUIntNTy(amtTy.getWidth());
+ amt = builder.createIntCast(amt, unsignedAmtTy);
+ }
+ cir::IntType unsignedVecElemType = builder.getUIntNTy(vecElemTy.getWidth());
+ amt = builder.createIntCast(amt, unsignedVecElemType);
+ amt = cir::VecSplatOp::create(
+ builder, location, cir::VectorType::get(unsignedVecElemType, numElems),
+ amt);
+ }
+
+ const StringRef intrinsicName = isRight ? "fshr" : "fshl";
+ return emitIntrinsicCallOp(builder, location, intrinsicName, op0Ty,
+ mlir::ValueRange{op0, op1, amt});
+}
+
+static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
+ bool isSigned,
+ SmallVectorImpl<mlir::Value> &ops,
+ unsigned opTypePrimitiveSizeInBits) {
+ mlir::Type ty = cir::VectorType::get(builder.getSInt64Ty(),
+ opTypePrimitiveSizeInBits / 64);
+ mlir::Value lhs = builder.createBitcast(loc, ops[0], ty);
+ mlir::Value rhs = builder.createBitcast(loc, ops[1], ty);
+ if (isSigned) {
+ cir::ConstantOp shiftAmt =
+ builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32));
+ cir::VecSplatOp shiftSplatVecOp =
+ cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult());
+ mlir::Value shiftSplatValue = shiftSplatVecOp.getResult();
+ // In CIR, right-shift operations are automatically lowered to either an
+ // arithmetic or logical shift depending on the operand type. The purpose
+ // of the shifts here is to propagate the sign bit of the 32-bit input
+ // into the upper bits of each vector lane.
+ lhs = builder.createShift(loc, lhs, shiftSplatValue, true);
+ lhs = builder.createShift(loc, lhs, shiftSplatValue, false);
+ rhs = builder.createShift(loc, rhs, shiftSplatValue, true);
+ rhs = builder.createShift(loc, rhs, shiftSplatValue, false);
+ } else {
+ cir::ConstantOp maskScalar = builder.getConstant(
+ loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff));
+ cir::VecSplatOp mask =
+ cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult());
+ // Clear the upper bits
+ lhs = builder.createAnd(loc, lhs, mask);
+ rhs = builder.createAnd(loc, rhs, mask);
+ }
+ return builder.createMul(loc, lhs, rhs);
+}
+
+static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
+ llvm::SmallVector<mlir::Value> ops,
+ bool isSigned) {
+ mlir::Value op0 = ops[0];
+ mlir::Value op1 = ops[1];
+
+ cir::VectorType ty = cast<cir::VectorType>(op0.getType());
+ cir::IntType elementTy = cast<cir::IntType>(ty.getElementType());
+
+ uint64_t imm = CIRGenFunction::getZExtIntValueFromConstOp(ops[2]) & 0x7;
+
+ cir::CmpOpKind pred;
+ switch (imm) {
+ case 0x0:
+ pred = cir::CmpOpKind::lt;
+ break;
+ case 0x1:
+ pred = cir::CmpOpKind::le;
+ break;
+ case 0x2:
+ pred = cir::CmpOpKind::gt;
+ break;
+ case 0x3:
+ pred = cir::CmpOpKind::ge;
+ break;
+ case 0x4:
+ pred = cir::CmpOpKind::eq;
+ break;
+ case 0x5:
+ pred = cir::CmpOpKind::ne;
+ break;
+ case 0x6:
+ return builder.getNullValue(ty, loc); // FALSE
+ case 0x7: {
+ llvm::APInt allOnes = llvm::APInt::getAllOnes(elementTy.getWidth());
+ return cir::VecSplatOp::create(
+ builder, loc, ty,
+ builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE
+ }
+ default:
+ llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
+ }
+
+ if ((!isSigned && elementTy.isSigned()) ||
+ (isSigned && elementTy.isUnsigned())) {
+ elementTy = elementTy.isSigned() ? builder.getUIntNTy(elementTy.getWidth())
+ : builder.getSIntNTy(elementTy.getWidth());
+ ty = cir::VectorType::get(elementTy, ty.getSize());
+ op0 = builder.createBitcast(op0, ty);
+ op1 = builder.createBitcast(op1, ty);
+ }
+
+ return builder.createVecCompare(loc, pred, op0, op1);
+}
+
+mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
+ const CallExpr *expr) {
+ if (builtinID == Builtin::BI__builtin_cpu_is) {
+ cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is");
+ return {};
+ }
+ if (builtinID == Builtin::BI__builtin_cpu_supports) {
+ cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_supports");
+ return {};
+ }
+ if (builtinID == Builtin::BI__builtin_cpu_init) {
+ cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_init");
+ return {};
+ }
+
+ // Handle MSVC intrinsics before argument evaluation to prevent double
+ // evaluation.
+ assert(!cir::MissingFeatures::msvcBuiltins());
+
+ // Find out if any arguments are required to be integer constant expressions.
+ assert(!cir::MissingFeatures::handleBuiltinICEArguments());
+
+ // The operands of the builtin call
+ llvm::SmallVector<mlir::Value> ops;
+
+ // `ICEArguments` is a bitmap indicating whether the argument at the i-th bit
+ // is required to be a constant integer expression.
+ unsigned iceArguments = 0;
+ ASTContext::GetBuiltinTypeError error;
+ getContext().GetBuiltinType(builtinID, error, &iceArguments);
+ assert(error == ASTContext::GE_None && "Error while getting builtin type.");
+
+ for (auto [idx, arg] : llvm::enumerate(expr->arguments()))
+ ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg));
+
+ CIRGenBuilderTy &builder = getBuilder();
+ mlir::Type voidTy = builder.getVoidTy();
+
+ switch (builtinID) {
+ default:
+ return {};
+ case X86::BI_mm_clflush:
+ return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+ "x86.sse2.clflush", voidTy, ops[0]);
+ case X86::BI_mm_lfence:
+ return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+ "x86.sse2.lfence", voidTy);
+ case X86::BI_mm_pause:
+ return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+ "x86.sse2.pause", voidTy);
+ case X86::BI_mm_mfence:
+ return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+ "x86.sse2.mfence", voidTy);
+ case X86::BI_mm_sfence:
+ return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+ "x86.sse.sfence", voidTy);
+ case X86::BI_mm_prefetch:
+ case X86::BI__rdtsc:
+ case X86::BI__builtin_ia32_rdtscp: {
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ }
+ case X86::BI__builtin_ia32_lzcnt_u16:
+ case X86::BI__builtin_ia32_lzcnt_u32:
+ case X86::BI__builtin_ia32_lzcnt_u64: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ mlir::Value isZeroPoison = builder.getFalse(loc);
+ return emitIntrinsicCallOp(builder, loc, "ctlz", ops[0].getType(),
+ mlir::ValueRange{ops[0], isZeroPoison});
+ }
+ case X86::BI__builtin_ia32_tzcnt_u16:
+ case X86::BI__builtin_ia32_tzcnt_u32:
+ case X86::BI__builtin_ia32_tzcnt_u64: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ mlir::Value isZeroPoison = builder.getFalse(loc);
+ return emitIntrinsicCallOp(builder, loc, "cttz", ops[0].getType(),
+ mlir::ValueRange{ops[0], isZeroPoison});
+ }
+ case X86::BI__builtin_ia32_undef128:
+ case X86::BI__builtin_ia32_undef256:
+ case X86::BI__builtin_ia32_undef512:
+ // The x86 definition of "undef" is not the same as the LLVM definition
+ // (PR32176). We leave optimizing away an unnecessary zero constant to the
+ // IR optimizer and backend.
+ // TODO: If we had a "freeze" IR instruction to generate a fixed undef
+ // value, we should use that here instead of a zero.
+ return builder.getNullValue(convertType(expr->getType()),
+ getLoc(expr->getExprLoc()));
+ case X86::BI__builtin_ia32_vec_ext_v4hi:
+ case X86::BI__builtin_ia32_vec_ext_v16qi:
+ case X86::BI__builtin_ia32_vec_ext_v8hi:
+ case X86::BI__builtin_ia32_vec_ext_v4si:
+ case X86::BI__builtin_ia32_vec_ext_v4sf:
+ case X86::BI__builtin_ia32_vec_ext_v2di:
+ case X86::BI__builtin_ia32_vec_ext_v32qi:
+ case X86::BI__builtin_ia32_vec_ext_v16hi:
+ case X86::BI__builtin_ia32_vec_ext_v8si:
+ case X86::BI__builtin_ia32_vec_ext_v4di: {
+ unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
+
+ uint64_t index = getZExtIntValueFromConstOp(ops[1]);
+ index &= numElts - 1;
+
+ cir::ConstantOp indexVal =
+ builder.getUInt64(index, getLoc(expr->getExprLoc()));
+
+ // These builtins exist so we can ensure the index is an ICE and in range.
+ // Otherwise we could just do this in the header file.
+ return cir::VecExtractOp::create(builder, getLoc(expr->getExprLoc()),
+ ops[0], indexVal);
+ }
+ case X86::BI__builtin_ia32_vec_set_v4hi:
+ case X86::BI__builtin_ia32_vec_set_v16qi:
+ case X86::BI__builtin_ia32_vec_set_v8hi:
+ case X86::BI__builtin_ia32_vec_set_v4si:
+ case X86::BI__builtin_ia32_vec_set_v2di:
+ case X86::BI__builtin_ia32_vec_set_v32qi:
+ case X86::BI__builtin_ia32_vec_set_v16hi:
+ case X86::BI__builtin_ia32_vec_set_v8si:
+ case X86::BI__builtin_ia32_vec_set_v4di: {
+ return emitVecInsert(builder, getLoc(expr->getExprLoc()), ops[0], ops[1],
+ ops[2]);
+ }
+ case X86::BI__builtin_ia32_kunpckhi:
+ return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.kunpackb", ops);
+ case X86::BI__builtin_ia32_kunpcksi:
+ return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.kunpackw", ops);
+ case X86::BI__builtin_ia32_kunpckdi:
+ return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.kunpackd", ops);
+ case X86::BI_mm_setcsr:
+ case X86::BI__builtin_ia32_ldmxcsr: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ Address tmp = createMemTemp(expr->getArg(0)->getType(), loc);
+ builder.createStore(loc, ops[0], tmp);
+ return emitIntrinsicCallOp(builder, loc, "x86.sse.ldmxcsr",
+ builder.getVoidTy(), tmp.getPointer());
+ }
+ case X86::BI_mm_getcsr:
+ case X86::BI__builtin_ia32_stmxcsr: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ Address tmp = createMemTemp(expr->getType(), loc);
+ emitIntrinsicCallOp(builder, loc, "x86.sse.stmxcsr", builder.getVoidTy(),
+ tmp.getPointer());
+ return builder.createLoad(loc, tmp);
+ }
+ case X86::BI__builtin_ia32_xsave:
+ case X86::BI__builtin_ia32_xsave64:
+ case X86::BI__builtin_ia32_xrstor:
+ case X86::BI__builtin_ia32_xrstor64:
+ case X86::BI__builtin_ia32_xsaveopt:
+ case X86::BI__builtin_ia32_xsaveopt64:
+ case X86::BI__builtin_ia32_xrstors:
+ case X86::BI__builtin_ia32_xrstors64:
+ case X86::BI__builtin_ia32_xsavec:
+ case X86::BI__builtin_ia32_xsavec64:
+ case X86::BI__builtin_ia32_xsaves:
+ case X86::BI__builtin_ia32_xsaves64:
+ case X86::BI__builtin_ia32_xsetbv:
+ case X86::BI_xsetbv:
+ case X86::BI__builtin_ia32_xgetbv:
+ case X86::BI_xgetbv:
+ case X86::BI__builtin_ia32_storedqudi128_mask:
+ case X86::BI__builtin_ia32_storedqusi128_mask:
+ case X86::BI__builtin_ia32_storedquhi128_mask:
+ case X86::BI__builtin_ia32_storedquqi128_mask:
+ case X86::BI__builtin_ia32_storeupd128_mask:
+ case X86::BI__builtin_ia32_storeups128_mask:
+ case X86::BI__builtin_ia32_storedqudi256_mask:
+ case X86::BI__builtin_ia32_storedqusi256_mask:
+ case X86::BI__builtin_ia32_storedquhi256_mask:
+ case X86::BI__builtin_ia32_storedquqi256_mask:
+ case X86::BI__builtin_ia32_storeupd256_mask:
+ case X86::BI__builtin_ia32_storeups256_mask:
+ case X86::BI__builtin_ia32_storedqudi512_mask:
+ case X86::BI__builtin_ia32_storedqusi512_mask:
+ case X86::BI__builtin_ia32_storedquhi512_mask:
+ case X86::BI__builtin_ia32_storedquqi512_mask:
+ case X86::BI__builtin_ia32_storeupd512_mask:
+ case X86::BI__builtin_ia32_storeups512_mask:
+ case X86::BI__builtin_ia32_storesbf16128_mask:
+ case X86::BI__builtin_ia32_storesh128_mask:
+ case X86::BI__builtin_ia32_storess128_mask:
+ case X86::BI__builtin_ia32_storesd128_mask:
+ case X86::BI__builtin_ia32_cvtmask2b128:
+ case X86::BI__builtin_ia32_cvtmask2b256:
+ case X86::BI__builtin_ia32_cvtmask2b512:
+ case X86::BI__builtin_ia32_cvtmask2w128:
+ case X86::BI__builtin_ia32_cvtmask2w256:
+ case X86::BI__builtin_ia32_cvtmask2w512:
+ case X86::BI__builtin_ia32_cvtmask2d128:
+ case X86::BI__builtin_ia32_cvtmask2d256:
+ case X86::BI__builtin_ia32_cvtmask2d512:
+ case X86::BI__builtin_ia32_cvtmask2q128:
+ case X86::BI__builtin_ia32_cvtmask2q256:
+ case X86::BI__builtin_ia32_cvtmask2q512:
+ case X86::BI__builtin_ia32_cvtb2mask128:
+ case X86::BI__builtin_ia32_cvtb2mask256:
+ case X86::BI__builtin_ia32_cvtb2mask512:
+ case X86::BI__builtin_ia32_cvtw2mask128:
+ case X86::BI__builtin_ia32_cvtw2mask256:
+ case X86::BI__builtin_ia32_cvtw2mask512:
+ case X86::BI__builtin_ia32_cvtd2mask128:
+ case X86::BI__builtin_ia32_cvtd2mask256:
+ case X86::BI__builtin_ia32_cvtd2mask512:
+ case X86::BI__builtin_ia32_cvtq2mask128:
+ case X86::BI__builtin_ia32_cvtq2mask256:
+ case X86::BI__builtin_ia32_cvtq2mask512:
+ case X86::BI__builtin_ia32_cvtdq2ps512_mask:
+ case X86::BI__builtin_ia32_cvtqq2ps512_mask:
+ case X86::BI__builtin_ia32_cvtqq2pd512_mask:
+ case X86::BI__builtin_ia32_vcvtw2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
+ case X86::BI__builtin_ia32_cvtudq2ps512_mask:
+ case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
+ case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
+ case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
+ case X86::BI__builtin_ia32_vfmaddsh3_mask:
+ case X86::BI__builtin_ia32_vfmaddss3_mask:
+ case X86::BI__builtin_ia32_vfmaddsd3_mask:
+ case X86::BI__builtin_ia32_vfmaddsh3_maskz:
+ case X86::BI__builtin_ia32_vfmaddss3_maskz:
+ case X86::BI__builtin_ia32_vfmaddsd3_maskz:
+ case X86::BI__builtin_ia32_vfmaddsh3_mask3:
+ case X86::BI__builtin_ia32_vfmaddss3_mask3:
+ case X86::BI__builtin_ia32_vfmaddsd3_mask3:
+ case X86::BI__builtin_ia32_vfmsubsh3_mask3:
+ case X86::BI__builtin_ia32_vfmsubss3_mask3:
+ case X86::BI__builtin_ia32_vfmsubsd3_mask3:
+ case X86::BI__builtin_ia32_vfmaddph512_mask:
+ case X86::BI__builtin_ia32_vfmaddph512_maskz:
+ case X86::BI__builtin_ia32_vfmaddph512_mask3:
+ case X86::BI__builtin_ia32_vfmaddps512_mask:
+ case X86::BI__builtin_ia32_vfmaddps512_maskz:
+ case X86::BI__builtin_ia32_vfmaddps512_mask3:
+ case X86::BI__builtin_ia32_vfmsubps512_mask3:
+ case X86::BI__builtin_ia32_vfmaddpd512_mask:
+ case X86::BI__builtin_ia32_vfmaddpd512_maskz:
+ case X86::BI__builtin_ia32_vfmaddpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubph512_mask3:
+ case X86::BI__builtin_ia32_vfmaddsubph512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
+ case X86::BI__builtin_ia32_vfmaddsubps512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
+ case X86::BI__builtin_ia32_movdqa32store128_mask:
+ case X86::BI__builtin_ia32_movdqa64store128_mask:
+ case X86::BI__builtin_ia32_storeaps128_mask:
+ case X86::BI__builtin_ia32_storeapd128_mask:
+ case X86::BI__builtin_ia32_movdqa32store256_mask:
+ case X86::BI__builtin_ia32_movdqa64store256_mask:
+ case X86::BI__builtin_ia32_storeaps256_mask:
+ case X86::BI__builtin_ia32_storeapd256_mask:
+ case X86::BI__builtin_ia32_movdqa32store512_mask:
+ case X86::BI__builtin_ia32_movdqa64store512_mask:
+ case X86::BI__builtin_ia32_storeaps512_mask:
+ case X86::BI__builtin_ia32_storeapd512_mask:
+ case X86::BI__builtin_ia32_loadups128_mask:
+ case X86::BI__builtin_ia32_loadups256_mask:
+ case X86::BI__builtin_ia32_loadups512_mask:
+ case X86::BI__builtin_ia32_loadupd128_mask:
+ case X86::BI__builtin_ia32_loadupd256_mask:
+ case X86::BI__builtin_ia32_loadupd512_mask:
+ case X86::BI__builtin_ia32_loaddquqi128_mask:
+ case X86::BI__builtin_ia32_loaddquqi256_mask:
+ case X86::BI__builtin_ia32_loaddquqi512_mask:
+ case X86::BI__builtin_ia32_loaddquhi128_mask:
+ case X86::BI__builtin_ia32_loaddquhi256_mask:
+ case X86::BI__builtin_ia32_loaddquhi512_mask:
+ case X86::BI__builtin_ia32_loaddqusi128_mask:
+ case X86::BI__builtin_ia32_loaddqusi256_mask:
+ case X86::BI__builtin_ia32_loaddqusi512_mask:
+ case X86::BI__builtin_ia32_loaddqudi128_mask:
+ case X86::BI__builtin_ia32_loaddqudi256_mask:
+ case X86::BI__builtin_ia32_loaddqudi512_mask:
+ case X86::BI__builtin_ia32_loadsbf16128_mask:
+ case X86::BI__builtin_ia32_loadsh128_mask:
+ case X86::BI__builtin_ia32_loadss128_mask:
+ case X86::BI__builtin_ia32_loadsd128_mask:
+ case X86::BI__builtin_ia32_loadaps128_mask:
+ case X86::BI__builtin_ia32_loadaps256_mask:
+ case X86::BI__builtin_ia32_loadaps512_mask:
+ case X86::BI__builtin_ia32_loadapd128_mask:
+ case X86::BI__builtin_ia32_loadapd256_mask:
+ case X86::BI__builtin_ia32_loadapd512_mask:
+ case X86::BI__builtin_ia32_movdqa32load128_mask:
+ case X86::BI__builtin_ia32_movdqa32load256_mask:
+ case X86::BI__builtin_ia32_movdqa32load512_mask:
+ case X86::BI__builtin_ia32_movdqa64load128_mask:
+ case X86::BI__builtin_ia32_movdqa64load256_mask:
+ case X86::BI__builtin_ia32_movdqa64load512_mask:
+ case X86::BI__builtin_ia32_expandloaddf128_mask:
+ case X86::BI__builtin_ia32_expandloaddf256_mask:
+ case X86::BI__builtin_ia32_expandloaddf512_mask:
+ case X86::BI__builtin_ia32_expandloadsf128_mask:
+ case X86::BI__builtin_ia32_expandloadsf256_mask:
+ case X86::BI__builtin_ia32_expandloadsf512_mask:
+ case X86::BI__builtin_ia32_expandloaddi128_mask:
+ case X86::BI__builtin_ia32_expandloaddi256_mask:
+ case X86::BI__builtin_ia32_expandloaddi512_mask:
+ case X86::BI__builtin_ia32_expandloadsi128_mask:
+ case X86::BI__builtin_ia32_expandloadsi256_mask:
+ case X86::BI__builtin_ia32_expandloadsi512_mask:
+ case X86::BI__builtin_ia32_expandloadhi128_mask:
+ case X86::BI__builtin_ia32_expandloadhi256_mask:
+ case X86::BI__builtin_ia32_expandloadhi512_mask:
+ case X86::BI__builtin_ia32_expandloadqi128_mask:
+ case X86::BI__builtin_ia32_expandloadqi256_mask:
+ case X86::BI__builtin_ia32_expandloadqi512_mask:
+ case X86::BI__builtin_ia32_compressstoredf128_mask:
+ case X86::BI__builtin_ia32_compressstoredf256_mask:
+ case X86::BI__builtin_ia32_compressstoredf512_mask:
+ case X86::BI__builtin_ia32_compressstoresf128_mask:
+ case X86::BI__builtin_ia32_compressstoresf256_mask:
+ case X86::BI__builtin_ia32_compressstoresf512_mask:
+ case X86::BI__builtin_ia32_compressstoredi128_mask:
+ case X86::BI__builtin_ia32_compressstoredi256_mask:
+ case X86::BI__builtin_ia32_compressstoredi512_mask:
+ case X86::BI__builtin_ia32_compressstoresi128_mask:
+ case X86::BI__builtin_ia32_compressstoresi256_mask:
+ case X86::BI__builtin_ia32_compressstoresi512_mask:
+ case X86::BI__builtin_ia32_compressstorehi128_mask:
+ case X86::BI__builtin_ia32_compressstorehi256_mask:
+ case X86::BI__builtin_ia32_compressstorehi512_mask:
+ case X86::BI__builtin_ia32_compressstoreqi128_mask:
+ case X86::BI__builtin_ia32_compressstoreqi256_mask:
+ case X86::BI__builtin_ia32_compressstoreqi512_mask:
+ case X86::BI__builtin_ia32_expanddf128_mask:
+ case X86::BI__builtin_ia32_expanddf256_mask:
+ case X86::BI__builtin_ia32_expanddf512_mask:
+ case X86::BI__builtin_ia32_expandsf128_mask:
+ case X86::BI__builtin_ia32_expandsf256_mask:
+ case X86::BI__builtin_ia32_expandsf512_mask:
+ case X86::BI__builtin_ia32_expanddi128_mask:
+ case X86::BI__builtin_ia32_expanddi256_mask:
+ case X86::BI__builtin_ia32_expanddi512_mask:
+ case X86::BI__builtin_ia32_expandsi128_mask:
+ case X86::BI__builtin_ia32_expandsi256_mask:
+ case X86::BI__builtin_ia32_expandsi512_mask:
+ case X86::BI__builtin_ia32_expandhi128_mask:
+ case X86::BI__builtin_ia32_expandhi256_mask:
+ case X86::BI__builtin_ia32_expandhi512_mask:
+ case X86::BI__builtin_ia32_expandqi128_mask:
+ case X86::BI__builtin_ia32_expandqi256_mask:
+ case X86::BI__builtin_ia32_expandqi512_mask:
+ case X86::BI__builtin_ia32_compressdf128_mask:
+ case X86::BI__builtin_ia32_compressdf256_mask:
+ case X86::BI__builtin_ia32_compressdf512_mask:
+ case X86::BI__builtin_ia32_compresssf128_mask:
+ case X86::BI__builtin_ia32_compresssf256_mask:
+ case X86::BI__builtin_ia32_compresssf512_mask:
+ case X86::BI__builtin_ia32_compressdi128_mask:
+ case X86::BI__builtin_ia32_compressdi256_mask:
+ case X86::BI__builtin_ia32_compressdi512_mask:
+ case X86::BI__builtin_ia32_compresssi128_mask:
+ case X86::BI__builtin_ia32_compresssi256_mask:
+ case X86::BI__builtin_ia32_compresssi512_mask:
+ case X86::BI__builtin_ia32_compresshi128_mask:
+ case X86::BI__builtin_ia32_compresshi256_mask:
+ case X86::BI__builtin_ia32_compresshi512_mask:
+ case X86::BI__builtin_ia32_compressqi128_mask:
+ case X86::BI__builtin_ia32_compressqi256_mask:
+ case X86::BI__builtin_ia32_compressqi512_mask:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ case X86::BI__builtin_ia32_gather3div2df:
+ case X86::BI__builtin_ia32_gather3div2di:
+ case X86::BI__builtin_ia32_gather3div4df:
+ case X86::BI__builtin_ia32_gather3div4di:
+ case X86::BI__builtin_ia32_gather3div4sf:
+ case X86::BI__builtin_ia32_gather3div4si:
+ case X86::BI__builtin_ia32_gather3div8sf:
+ case X86::BI__builtin_ia32_gather3div8si:
+ case X86::BI__builtin_ia32_gather3siv2df:
+ case X86::BI__builtin_ia32_gather3siv2di:
+ case X86::BI__builtin_ia32_gather3siv4df:
+ case X86::BI__builtin_ia32_gather3siv4di:
+ case X86::BI__builtin_ia32_gather3siv4sf:
+ case X86::BI__builtin_ia32_gather3siv4si:
+ case X86::BI__builtin_ia32_gather3siv8sf:
+ case X86::BI__builtin_ia32_gather3siv8si:
+ case X86::BI__builtin_ia32_gathersiv8df:
+ case X86::BI__builtin_ia32_gathersiv16sf:
+ case X86::BI__builtin_ia32_gatherdiv8df:
+ case X86::BI__builtin_ia32_gatherdiv16sf:
+ case X86::BI__builtin_ia32_gathersiv8di:
+ case X86::BI__builtin_ia32_gathersiv16si:
+ case X86::BI__builtin_ia32_gatherdiv8di:
+ case X86::BI__builtin_ia32_gatherdiv16si: {
+ StringRef intrinsicName;
+ switch (builtinID) {
+ default:
+ llvm_unreachable("Unexpected builtin");
+ case X86::BI__builtin_ia32_gather3div2df:
+ intrinsicName = "x86.avx512.mask.gather3div2.df";
+ break;
+ case X86::BI__builtin_ia32_gather3div2di:
+ intrinsicName = "x86.avx512.mask.gather3div2.di";
+ break;
+ case X86::BI__builtin_ia32_gather3div4df:
+ intrinsicName = "x86.avx512.mask.gather3div4.df";
+ break;
+ case X86::BI__builtin_ia32_gather3div4di:
+ intrinsicName = "x86.avx512.mask.gather3div4.di";
+ break;
+ case X86::BI__builtin_ia32_gather3div4sf:
+ intrinsicName = "x86.avx512.mask.gather3div4.sf";
+ break;
+ case X86::BI__builtin_ia32_gather3div4si:
+ intrinsicName = "x86.avx512.mask.gather3div4.si";
+ break;
+ case X86::BI__builtin_ia32_gather3div8sf:
+ intrinsicName = "x86.avx512.mask.gather3div8.sf";
+ break;
+ case X86::BI__builtin_ia32_gather3div8si:
+ intrinsicName = "x86.avx512.mask.gather3div8.si";
+ break;
+ case X86::BI__builtin_ia32_gather3siv2df:
+ intrinsicName = "x86.avx512.mask.gather3siv2.df";
+ break;
+ case X86::BI__builtin_ia32_gather3siv2di:
+ intrinsicName = "x86.avx512.mask.gather3siv2.di";
+ break;
+ case X86::BI__builtin_ia32_gather3siv4df:
+ intrinsicName = "x86.avx512.mask.gather3siv4.df";
+ break;
+ case X86::BI__builtin_ia32_gather3siv4di:
+ intrinsicName = "x86.avx512.mask.gather3siv4.di";
+ break;
+ case X86::BI__builtin_ia32_gather3siv4sf:
+ intrinsicName = "x86.avx512.mask.gather3siv4.sf";
+ break;
+ case X86::BI__builtin_ia32_gather3siv4si:
+ intrinsicName = "x86.avx512.mask.gather3siv4.si";
+ break;
+ case X86::BI__builtin_ia32_gather3siv8sf:
+ intrinsicName = "x86.avx512.mask.gather3siv8.sf";
+ break;
+ case X86::BI__builtin_ia32_gather3siv8si:
+ intrinsicName = "x86.avx512.mask.gather3siv8.si";
+ break;
+ case X86::BI__builtin_ia32_gathersiv8df:
+ intrinsicName = "x86.avx512.mask.gather.dpd.512";
+ break;
+ case X86::BI__builtin_ia32_gathersiv16sf:
+ intrinsicName = "x86.avx512.mask.gather.dps.512";
+ break;
+ case X86::BI__builtin_ia32_gatherdiv8df:
+ intrinsicName = "x86.avx512.mask.gather.qpd.512";
+ break;
+ case X86::BI__builtin_ia32_gatherdiv16sf:
+ intrinsicName = "x86.avx512.mask.gather.qps.512";
+ break;
+ case X86::BI__builtin_ia32_gathersiv8di:
+ intrinsicName = "x86.avx512.mask.gather.dpq.512";
+ break;
+ case X86::BI__builtin_ia32_gathersiv16si:
+ intrinsicName = "x86.avx512.mask.gather.dpi.512";
+ break;
+ case X86::BI__builtin_ia32_gatherdiv8di:
+ intrinsicName = "x86.avx512.mask.gather.qpq.512";
+ break;
+ case X86::BI__builtin_ia32_gatherdiv16si:
+ intrinsicName = "x86.avx512.mask.gather.qpi.512";
+ break;
+ }
+
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ unsigned minElts =
+ std::min(cast<cir::VectorType>(ops[0].getType()).getSize(),
+ cast<cir::VectorType>(ops[2].getType()).getSize());
+ ops[3] = getMaskVecValue(builder, loc, ops[3], minElts);
+ return emitIntrinsicCallOp(builder, loc, intrinsicName,
+ convertType(expr->getType()), ops);
+ }
+ case X86::BI__builtin_ia32_scattersiv8df:
+ case X86::BI__builtin_ia32_scattersiv16sf:
+ case X86::BI__builtin_ia32_scatterdiv8df:
+ case X86::BI__builtin_ia32_scatterdiv16sf:
+ case X86::BI__builtin_ia32_scattersiv8di:
+ case X86::BI__builtin_ia32_scattersiv16si:
+ case X86::BI__builtin_ia32_scatterdiv8di:
+ case X86::BI__builtin_ia32_scatterdiv16si:
+ case X86::BI__builtin_ia32_scatterdiv2df:
+ case X86::BI__builtin_ia32_scatterdiv2di:
+ case X86::BI__builtin_ia32_scatterdiv4df:
+ case X86::BI__builtin_ia32_scatterdiv4di:
+ case X86::BI__builtin_ia32_scatterdiv4sf:
+ case X86::BI__builtin_ia32_scatterdiv4si:
+ case X86::BI__builtin_ia32_scatterdiv8sf:
+ case X86::BI__builtin_ia32_scatterdiv8si:
+ case X86::BI__builtin_ia32_scattersiv2df:
+ case X86::BI__builtin_ia32_scattersiv2di:
+ case X86::BI__builtin_ia32_scattersiv4df:
+ case X86::BI__builtin_ia32_scattersiv4di:
+ case X86::BI__builtin_ia32_scattersiv4sf:
+ case X86::BI__builtin_ia32_scattersiv4si:
+ case X86::BI__builtin_ia32_scattersiv8sf:
+ case X86::BI__builtin_ia32_scattersiv8si: {
+ llvm::StringRef intrinsicName;
+ switch (builtinID) {
+ default:
+ llvm_unreachable("Unexpected builtin");
+ case X86::BI__builtin_ia32_scattersiv8df:
+ intrinsicName = "x86.avx512.mask.scatter.dpd.512";
+ break;
+ case X86::BI__builtin_ia32_scattersiv16sf:
+ intrinsicName = "x86.avx512.mask.scatter.dps.512";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv8df:
+ intrinsicName = "x86.avx512.mask.scatter.qpd.512";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv16sf:
+ intrinsicName = "x86.avx512.mask.scatter.qps.512";
+ break;
+ case X86::BI__builtin_ia32_scattersiv8di:
+ intrinsicName = "x86.avx512.mask.scatter.dpq.512";
+ break;
+ case X86::BI__builtin_ia32_scattersiv16si:
+ intrinsicName = "x86.avx512.mask.scatter.dpi.512";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv8di:
+ intrinsicName = "x86.avx512.mask.scatter.qpq.512";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv16si:
+ intrinsicName = "x86.avx512.mask.scatter.qpi.512";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv2df:
+ intrinsicName = "x86.avx512.mask.scatterdiv2.df";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv2di:
+ intrinsicName = "x86.avx512.mask.scatterdiv2.di";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv4df:
+ intrinsicName = "x86.avx512.mask.scatterdiv4.df";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv4di:
+ intrinsicName = "x86.avx512.mask.scatterdiv4.di";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv4sf:
+ intrinsicName = "x86.avx512.mask.scatterdiv4.sf";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv4si:
+ intrinsicName = "x86.avx512.mask.scatterdiv4.si";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv8sf:
+ intrinsicName = "x86.avx512.mask.scatterdiv8.sf";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv8si:
+ intrinsicName = "x86.avx512.mask.scatterdiv8.si";
+ break;
+ case X86::BI__builtin_ia32_scattersiv2df:
+ intrinsicName = "x86.avx512.mask.scattersiv2.df";
+ break;
+ case X86::BI__builtin_ia32_scattersiv2di:
+ intrinsicName = "x86.avx512.mask.scattersiv2.di";
+ break;
+ case X86::BI__builtin_ia32_scattersiv4df:
+ intrinsicName = "x86.avx512.mask.scattersiv4.df";
+ break;
+ case X86::BI__builtin_ia32_scattersiv4di:
+ intrinsicName = "x86.avx512.mask.scattersiv4.di";
+ break;
+ case X86::BI__builtin_ia32_scattersiv4sf:
+ intrinsicName = "x86.avx512.mask.scattersiv4.sf";
+ break;
+ case X86::BI__builtin_ia32_scattersiv4si:
+ intrinsicName = "x86.avx512.mask.scattersiv4.si";
+ break;
+ case X86::BI__builtin_ia32_scattersiv8sf:
+ intrinsicName = "x86.avx512.mask.scattersiv8.sf";
+ break;
+ case X86::BI__builtin_ia32_scattersiv8si:
+ intrinsicName = "x86.avx512.mask.scattersiv8.si";
+ break;
+ }
+
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ unsigned minElts =
+ std::min(cast<cir::VectorType>(ops[2].getType()).getSize(),
+ cast<cir::VectorType>(ops[3].getType()).getSize());
+ ops[1] = getMaskVecValue(builder, loc, ops[1], minElts);
+
+ return emitIntrinsicCallOp(builder, loc, intrinsicName,
+ convertType(expr->getType()), ops);
+ }
+ case X86::BI__builtin_ia32_vextractf128_pd256:
+ case X86::BI__builtin_ia32_vextractf128_ps256:
+ case X86::BI__builtin_ia32_vextractf128_si256:
+ case X86::BI__builtin_ia32_extract128i256:
+ case X86::BI__builtin_ia32_extractf64x4_mask:
+ case X86::BI__builtin_ia32_extractf32x4_mask:
+ case X86::BI__builtin_ia32_extracti64x4_mask:
+ case X86::BI__builtin_ia32_extracti32x4_mask:
+ case X86::BI__builtin_ia32_extractf32x8_mask:
+ case X86::BI__builtin_ia32_extracti32x8_mask:
+ case X86::BI__builtin_ia32_extractf32x4_256_mask:
+ case X86::BI__builtin_ia32_extracti32x4_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_256_mask:
+ case X86::BI__builtin_ia32_extracti64x2_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_512_mask:
+ case X86::BI__builtin_ia32_extracti64x2_512_mask:
+ case X86::BI__builtin_ia32_vinsertf128_pd256:
+ case X86::BI__builtin_ia32_vinsertf128_ps256:
+ case X86::BI__builtin_ia32_vinsertf128_si256:
+ case X86::BI__builtin_ia32_insert128i256:
+ case X86::BI__builtin_ia32_insertf64x4:
+ case X86::BI__builtin_ia32_insertf32x4:
+ case X86::BI__builtin_ia32_inserti64x4:
+ case X86::BI__builtin_ia32_inserti32x4:
+ case X86::BI__builtin_ia32_insertf32x8:
+ case X86::BI__builtin_ia32_inserti32x8:
+ case X86::BI__builtin_ia32_insertf32x4_256:
+ case X86::BI__builtin_ia32_inserti32x4_256:
+ case X86::BI__builtin_ia32_insertf64x2_256:
+ case X86::BI__builtin_ia32_inserti64x2_256:
+ case X86::BI__builtin_ia32_insertf64x2_512:
+ case X86::BI__builtin_ia32_inserti64x2_512:
+ case X86::BI__builtin_ia32_pmovqd512_mask:
+ case X86::BI__builtin_ia32_pmovwb512_mask:
+ case X86::BI__builtin_ia32_pblendw128:
+ case X86::BI__builtin_ia32_blendpd:
+ case X86::BI__builtin_ia32_blendps:
+ case X86::BI__builtin_ia32_blendpd256:
+ case X86::BI__builtin_ia32_blendps256:
+ case X86::BI__builtin_ia32_pblendw256:
+ case X86::BI__builtin_ia32_pblendd128:
+ case X86::BI__builtin_ia32_pblendd256:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ case X86::BI__builtin_ia32_pshuflw:
+ case X86::BI__builtin_ia32_pshuflw256:
+ case X86::BI__builtin_ia32_pshuflw512:
+ return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()),
+ true);
+ case X86::BI__builtin_ia32_pshufhw:
+ case X86::BI__builtin_ia32_pshufhw256:
+ case X86::BI__builtin_ia32_pshufhw512:
+ return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()),
+ false);
+ case X86::BI__builtin_ia32_pshufd:
+ case X86::BI__builtin_ia32_pshufd256:
+ case X86::BI__builtin_ia32_pshufd512:
+ case X86::BI__builtin_ia32_vpermilpd:
+ case X86::BI__builtin_ia32_vpermilps:
+ case X86::BI__builtin_ia32_vpermilpd256:
+ case X86::BI__builtin_ia32_vpermilps256:
+ case X86::BI__builtin_ia32_vpermilpd512:
+ case X86::BI__builtin_ia32_vpermilps512: {
+ const uint32_t imm = getSExtIntValueFromConstOp(ops[1]);
+
+ llvm::SmallVector<int64_t, 16> mask(16);
+ computeFullLaneShuffleMask(*this, ops[0], imm, false, mask);
+
+ return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], mask);
+ }
+ case X86::BI__builtin_ia32_shufpd:
+ case X86::BI__builtin_ia32_shufpd256:
+ case X86::BI__builtin_ia32_shufpd512:
+ case X86::BI__builtin_ia32_shufps:
+ case X86::BI__builtin_ia32_shufps256:
+ case X86::BI__builtin_ia32_shufps512: {
+ const uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
+
+ llvm::SmallVector<int64_t, 16> mask(16);
+ computeFullLaneShuffleMask(*this, ops[0], imm, true, mask);
+
+ return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
+ mask);
+ }
+ case X86::BI__builtin_ia32_permdi256:
+ case X86::BI__builtin_ia32_permdf256:
+ case X86::BI__builtin_ia32_permdi512:
+ case X86::BI__builtin_ia32_permdf512:
+ case X86::BI__builtin_ia32_palignr128:
+ case X86::BI__builtin_ia32_palignr256:
+ case X86::BI__builtin_ia32_palignr512:
+ case X86::BI__builtin_ia32_alignd128:
+ case X86::BI__builtin_ia32_alignd256:
+ case X86::BI__builtin_ia32_alignd512:
+ case X86::BI__builtin_ia32_alignq128:
+ case X86::BI__builtin_ia32_alignq256:
+ case X86::BI__builtin_ia32_alignq512:
+ case X86::BI__builtin_ia32_shuf_f32x4_256:
+ case X86::BI__builtin_ia32_shuf_f64x2_256:
+ case X86::BI__builtin_ia32_shuf_i32x4_256:
+ case X86::BI__builtin_ia32_shuf_i64x2_256:
+ case X86::BI__builtin_ia32_shuf_f32x4:
+ case X86::BI__builtin_ia32_shuf_f64x2:
+ case X86::BI__builtin_ia32_shuf_i32x4:
+ case X86::BI__builtin_ia32_shuf_i64x2:
+ case X86::BI__builtin_ia32_vperm2f128_pd256:
+ case X86::BI__builtin_ia32_vperm2f128_ps256:
+ case X86::BI__builtin_ia32_vperm2f128_si256:
+ case X86::BI__builtin_ia32_permti256:
+ case X86::BI__builtin_ia32_pslldqi128_byteshift:
+ case X86::BI__builtin_ia32_pslldqi256_byteshift:
+ case X86::BI__builtin_ia32_pslldqi512_byteshift:
+ case X86::BI__builtin_ia32_psrldqi128_byteshift:
+ case X86::BI__builtin_ia32_psrldqi256_byteshift:
+ case X86::BI__builtin_ia32_psrldqi512_byteshift:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ case X86::BI__builtin_ia32_kshiftliqi:
+ case X86::BI__builtin_ia32_kshiftlihi:
+ case X86::BI__builtin_ia32_kshiftlisi:
+ case X86::BI__builtin_ia32_kshiftlidi: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ unsigned shiftVal =
+ ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
+ 0xff;
+ unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
+
+ if (shiftVal >= numElems)
+ return builder.getNullValue(ops[0].getType(), loc);
+
+ mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems);
+
+ SmallVector<mlir::Attribute, 64> indices;
+ mlir::Type i32Ty = builder.getSInt32Ty();
+ for (auto i : llvm::seq<unsigned>(0, numElems))
+ indices.push_back(cir::IntAttr::get(i32Ty, numElems + i - shiftVal));
+
+ mlir::Value zero = builder.getNullValue(in.getType(), loc);
+ mlir::Value sv = builder.createVecShuffle(loc, zero, in, indices);
+ return builder.createBitcast(sv, ops[0].getType());
+ }
+ case X86::BI__builtin_ia32_kshiftriqi:
+ case X86::BI__builtin_ia32_kshiftrihi:
+ case X86::BI__builtin_ia32_kshiftrisi:
+ case X86::BI__builtin_ia32_kshiftridi: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ unsigned shiftVal =
+ ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
+ 0xff;
+ unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
+
+ if (shiftVal >= numElems)
+ return builder.getNullValue(ops[0].getType(), loc);
+
+ mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems);
+
+ SmallVector<mlir::Attribute, 64> indices;
+ mlir::Type i32Ty = builder.getSInt32Ty();
+ for (auto i : llvm::seq<unsigned>(0, numElems))
+ indices.push_back(cir::IntAttr::get(i32Ty, i + shiftVal));
+
+ mlir::Value zero = builder.getNullValue(in.getType(), loc);
+ mlir::Value sv = builder.createVecShuffle(loc, in, zero, indices);
+ return builder.createBitcast(sv, ops[0].getType());
+ }
+ case X86::BI__builtin_ia32_vprotbi:
+ case X86::BI__builtin_ia32_vprotwi:
+ case X86::BI__builtin_ia32_vprotdi:
+ case X86::BI__builtin_ia32_vprotqi:
+ case X86::BI__builtin_ia32_prold128:
+ case X86::BI__builtin_ia32_prold256:
+ case X86::BI__builtin_ia32_prold512:
+ case X86::BI__builtin_ia32_prolq128:
+ case X86::BI__builtin_ia32_prolq256:
+ case X86::BI__builtin_ia32_prolq512:
+ return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0],
+ ops[0], ops[1], false);
+ case X86::BI__builtin_ia32_prord128:
+ case X86::BI__builtin_ia32_prord256:
+ case X86::BI__builtin_ia32_prord512:
+ case X86::BI__builtin_ia32_prorq128:
+ case X86::BI__builtin_ia32_prorq256:
+ case X86::BI__builtin_ia32_prorq512:
+ return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0],
+ ops[0], ops[1], true);
+ case X86::BI__builtin_ia32_selectb_128:
+ case X86::BI__builtin_ia32_selectb_256:
+ case X86::BI__builtin_ia32_selectb_512:
+ case X86::BI__builtin_ia32_selectw_128:
+ case X86::BI__builtin_ia32_selectw_256:
+ case X86::BI__builtin_ia32_selectw_512:
+ case X86::BI__builtin_ia32_selectd_128:
+ case X86::BI__builtin_ia32_selectd_256:
+ case X86::BI__builtin_ia32_selectd_512:
+ case X86::BI__builtin_ia32_selectq_128:
+ case X86::BI__builtin_ia32_selectq_256:
+ case X86::BI__builtin_ia32_selectq_512:
+ case X86::BI__builtin_ia32_selectph_128:
+ case X86::BI__builtin_ia32_selectph_256:
+ case X86::BI__builtin_ia32_selectph_512:
+ case X86::BI__builtin_ia32_selectpbf_128:
+ case X86::BI__builtin_ia32_selectpbf_256:
+ case X86::BI__builtin_ia32_selectpbf_512:
+ case X86::BI__builtin_ia32_selectps_128:
+ case X86::BI__builtin_ia32_selectps_256:
+ case X86::BI__builtin_ia32_selectps_512:
+ case X86::BI__builtin_ia32_selectpd_128:
+ case X86::BI__builtin_ia32_selectpd_256:
+ case X86::BI__builtin_ia32_selectpd_512:
+ case X86::BI__builtin_ia32_selectsh_128:
+ case X86::BI__builtin_ia32_selectsbf_128:
+ case X86::BI__builtin_ia32_selectss_128:
+ case X86::BI__builtin_ia32_selectsd_128:
+ case X86::BI__builtin_ia32_cmpb128_mask:
+ case X86::BI__builtin_ia32_cmpb256_mask:
+ case X86::BI__builtin_ia32_cmpb512_mask:
+ case X86::BI__builtin_ia32_cmpw128_mask:
+ case X86::BI__builtin_ia32_cmpw256_mask:
+ case X86::BI__builtin_ia32_cmpw512_mask:
+ case X86::BI__builtin_ia32_cmpd128_mask:
+ case X86::BI__builtin_ia32_cmpd256_mask:
+ case X86::BI__builtin_ia32_cmpd512_mask:
+ case X86::BI__builtin_ia32_cmpq128_mask:
+ case X86::BI__builtin_ia32_cmpq256_mask:
+ case X86::BI__builtin_ia32_cmpq512_mask:
+ case X86::BI__builtin_ia32_ucmpb128_mask:
+ case X86::BI__builtin_ia32_ucmpb256_mask:
+ case X86::BI__builtin_ia32_ucmpb512_mask:
+ case X86::BI__builtin_ia32_ucmpw128_mask:
+ case X86::BI__builtin_ia32_ucmpw256_mask:
+ case X86::BI__builtin_ia32_ucmpw512_mask:
+ case X86::BI__builtin_ia32_ucmpd128_mask:
+ case X86::BI__builtin_ia32_ucmpd256_mask:
+ case X86::BI__builtin_ia32_ucmpd512_mask:
+ case X86::BI__builtin_ia32_ucmpq128_mask:
+ case X86::BI__builtin_ia32_ucmpq256_mask:
+ case X86::BI__builtin_ia32_ucmpq512_mask:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ case X86::BI__builtin_ia32_vpcomb:
+ case X86::BI__builtin_ia32_vpcomw:
+ case X86::BI__builtin_ia32_vpcomd:
+ case X86::BI__builtin_ia32_vpcomq:
+ return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true);
+ case X86::BI__builtin_ia32_vpcomub:
+ case X86::BI__builtin_ia32_vpcomuw:
+ case X86::BI__builtin_ia32_vpcomud:
+ case X86::BI__builtin_ia32_vpcomuq:
+ return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false);
+ case X86::BI__builtin_ia32_kortestcqi:
+ case X86::BI__builtin_ia32_kortestchi:
+ case X86::BI__builtin_ia32_kortestcsi:
+ case X86::BI__builtin_ia32_kortestcdi: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ cir::IntType ty = cast<cir::IntType>(ops[0].getType());
+ mlir::Value allOnesOp =
+ builder.getConstAPInt(loc, ty, APInt::getAllOnes(ty.getWidth()));
+ mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
+ mlir::Value cmp =
+ cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allOnesOp);
+ return builder.createCast(cir::CastKind::bool_to_int, cmp,
+ cgm.convertType(expr->getType()));
+ }
+ case X86::BI__builtin_ia32_kortestzqi:
+ case X86::BI__builtin_ia32_kortestzhi:
+ case X86::BI__builtin_ia32_kortestzsi:
+ case X86::BI__builtin_ia32_kortestzdi: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ cir::IntType ty = cast<cir::IntType>(ops[0].getType());
+ mlir::Value allZerosOp = builder.getNullValue(ty, loc).getResult();
+ mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
+ mlir::Value cmp =
+ cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allZerosOp);
+ return builder.createCast(cir::CastKind::bool_to_int, cmp,
+ cgm.convertType(expr->getType()));
+ }
+ case X86::BI__builtin_ia32_ktestcqi:
+ return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.ktestc.b", ops);
+ case X86::BI__builtin_ia32_ktestzqi:
+ return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.ktestz.b", ops);
+ case X86::BI__builtin_ia32_ktestchi:
+ return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.ktestc.w", ops);
+ case X86::BI__builtin_ia32_ktestzhi:
+ return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.ktestz.w", ops);
+ case X86::BI__builtin_ia32_ktestcsi:
+ return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.ktestc.d", ops);
+ case X86::BI__builtin_ia32_ktestzsi:
+ return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.ktestz.d", ops);
+ case X86::BI__builtin_ia32_ktestcdi:
+ return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.ktestc.q", ops);
+ case X86::BI__builtin_ia32_ktestzdi:
+ return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.ktestz.q", ops);
+ case X86::BI__builtin_ia32_kaddqi:
+ return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.kadd.b", ops);
+ case X86::BI__builtin_ia32_kaddhi:
+ return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.kadd.w", ops);
+ case X86::BI__builtin_ia32_kaddsi:
+ return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.kadd.d", ops);
+ case X86::BI__builtin_ia32_kadddi:
+ return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.kadd.q", ops);
+ case X86::BI__builtin_ia32_kandqi:
+ case X86::BI__builtin_ia32_kandhi:
+ case X86::BI__builtin_ia32_kandsi:
+ case X86::BI__builtin_ia32_kanddi:
+ return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
+ cir::BinOpKind::And, ops);
+ case X86::BI__builtin_ia32_kandnqi:
+ case X86::BI__builtin_ia32_kandnhi:
+ case X86::BI__builtin_ia32_kandnsi:
+ case X86::BI__builtin_ia32_kandndi:
+ return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
+ cir::BinOpKind::And, ops, true);
+ case X86::BI__builtin_ia32_korqi:
+ case X86::BI__builtin_ia32_korhi:
+ case X86::BI__builtin_ia32_korsi:
+ case X86::BI__builtin_ia32_kordi:
+ return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
+ cir::BinOpKind::Or, ops);
+ case X86::BI__builtin_ia32_kxnorqi:
+ case X86::BI__builtin_ia32_kxnorhi:
+ case X86::BI__builtin_ia32_kxnorsi:
+ case X86::BI__builtin_ia32_kxnordi:
+ return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
+ cir::BinOpKind::Xor, ops, true);
+ case X86::BI__builtin_ia32_kxorqi:
+ case X86::BI__builtin_ia32_kxorhi:
+ case X86::BI__builtin_ia32_kxorsi:
+ case X86::BI__builtin_ia32_kxordi:
+ return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
+ cir::BinOpKind::Xor, ops);
+ case X86::BI__builtin_ia32_knotqi:
+ case X86::BI__builtin_ia32_knothi:
+ case X86::BI__builtin_ia32_knotsi:
+ case X86::BI__builtin_ia32_knotdi: {
+ cir::IntType intTy = cast<cir::IntType>(ops[0].getType());
+ unsigned numElts = intTy.getWidth();
+ mlir::Value resVec =
+ getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts);
+ return builder.createBitcast(builder.createNot(resVec), ops[0].getType());
+ }
+ case X86::BI__builtin_ia32_kmovb:
+ case X86::BI__builtin_ia32_kmovw:
+ case X86::BI__builtin_ia32_kmovd:
+ case X86::BI__builtin_ia32_kmovq: {
+ // Bitcast to vXi1 type and then back to integer. This gets the mask
+ // register type into the IR, but might be optimized out depending on
+ // what's around it.
+ cir::IntType intTy = cast<cir::IntType>(ops[0].getType());
+ unsigned numElts = intTy.getWidth();
+ mlir::Value resVec =
+ getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts);
+ return builder.createBitcast(resVec, ops[0].getType());
+ }
+ case X86::BI__builtin_ia32_sqrtsh_round_mask:
+ case X86::BI__builtin_ia32_sqrtsd_round_mask:
+ case X86::BI__builtin_ia32_sqrtss_round_mask:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ case X86::BI__builtin_ia32_sqrtph512:
+ case X86::BI__builtin_ia32_sqrtps512:
+ case X86::BI__builtin_ia32_sqrtpd512: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ mlir::Value arg = ops[0];
+ return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult();
+ }
+ case X86::BI__builtin_ia32_pmuludq128:
+ case X86::BI__builtin_ia32_pmuludq256:
+ case X86::BI__builtin_ia32_pmuludq512: {
+ unsigned opTypePrimitiveSizeInBits =
+ cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
+ return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ false,
+ ops, opTypePrimitiveSizeInBits);
+ }
+ case X86::BI__builtin_ia32_pmuldq128:
+ case X86::BI__builtin_ia32_pmuldq256:
+ case X86::BI__builtin_ia32_pmuldq512: {
+ unsigned opTypePrimitiveSizeInBits =
+ cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
+ return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ true,
+ ops, opTypePrimitiveSizeInBits);
+ }
+ case X86::BI__builtin_ia32_pternlogd512_mask:
+ case X86::BI__builtin_ia32_pternlogq512_mask:
+ case X86::BI__builtin_ia32_pternlogd128_mask:
+ case X86::BI__builtin_ia32_pternlogd256_mask:
+ case X86::BI__builtin_ia32_pternlogq128_mask:
+ case X86::BI__builtin_ia32_pternlogq256_mask:
+ case X86::BI__builtin_ia32_pternlogd512_maskz:
+ case X86::BI__builtin_ia32_pternlogq512_maskz:
+ case X86::BI__builtin_ia32_pternlogd128_maskz:
+ case X86::BI__builtin_ia32_pternlogd256_maskz:
+ case X86::BI__builtin_ia32_pternlogq128_maskz:
+ case X86::BI__builtin_ia32_pternlogq256_maskz:
+ case X86::BI__builtin_ia32_vpshldd128:
+ case X86::BI__builtin_ia32_vpshldd256:
+ case X86::BI__builtin_ia32_vpshldd512:
+ case X86::BI__builtin_ia32_vpshldq128:
+ case X86::BI__builtin_ia32_vpshldq256:
+ case X86::BI__builtin_ia32_vpshldq512:
+ case X86::BI__builtin_ia32_vpshldw128:
+ case X86::BI__builtin_ia32_vpshldw256:
+ case X86::BI__builtin_ia32_vpshldw512:
+ case X86::BI__builtin_ia32_vpshrdd128:
+ case X86::BI__builtin_ia32_vpshrdd256:
+ case X86::BI__builtin_ia32_vpshrdd512:
+ case X86::BI__builtin_ia32_vpshrdq128:
+ case X86::BI__builtin_ia32_vpshrdq256:
+ case X86::BI__builtin_ia32_vpshrdq512:
+ case X86::BI__builtin_ia32_vpshrdw128:
+ case X86::BI__builtin_ia32_vpshrdw256:
+ case X86::BI__builtin_ia32_vpshrdw512:
+ case X86::BI__builtin_ia32_reduce_fadd_pd512:
+ case X86::BI__builtin_ia32_reduce_fadd_ps512:
+ case X86::BI__builtin_ia32_reduce_fadd_ph512:
+ case X86::BI__builtin_ia32_reduce_fadd_ph256:
+ case X86::BI__builtin_ia32_reduce_fadd_ph128:
+ case X86::BI__builtin_ia32_reduce_fmul_pd512:
+ case X86::BI__builtin_ia32_reduce_fmul_ps512:
+ case X86::BI__builtin_ia32_reduce_fmul_ph512:
+ case X86::BI__builtin_ia32_reduce_fmul_ph256:
+ case X86::BI__builtin_ia32_reduce_fmul_ph128:
+ case X86::BI__builtin_ia32_reduce_fmax_pd512:
+ case X86::BI__builtin_ia32_reduce_fmax_ps512:
+ case X86::BI__builtin_ia32_reduce_fmax_ph512:
+ case X86::BI__builtin_ia32_reduce_fmax_ph256:
+ case X86::BI__builtin_ia32_reduce_fmax_ph128:
+ case X86::BI__builtin_ia32_reduce_fmin_pd512:
+ case X86::BI__builtin_ia32_reduce_fmin_ps512:
+ case X86::BI__builtin_ia32_reduce_fmin_ph512:
+ case X86::BI__builtin_ia32_reduce_fmin_ph256:
+ case X86::BI__builtin_ia32_reduce_fmin_ph128:
+ case X86::BI__builtin_ia32_rdrand16_step:
+ case X86::BI__builtin_ia32_rdrand32_step:
+ case X86::BI__builtin_ia32_rdrand64_step:
+ case X86::BI__builtin_ia32_rdseed16_step:
+ case X86::BI__builtin_ia32_rdseed32_step:
+ case X86::BI__builtin_ia32_rdseed64_step:
+ case X86::BI__builtin_ia32_addcarryx_u32:
+ case X86::BI__builtin_ia32_addcarryx_u64:
+ case X86::BI__builtin_ia32_subborrow_u32:
+ case X86::BI__builtin_ia32_subborrow_u64:
+ case X86::BI__builtin_ia32_fpclassps128_mask:
+ case X86::BI__builtin_ia32_fpclassps256_mask:
+ case X86::BI__builtin_ia32_fpclassps512_mask:
+ case X86::BI__builtin_ia32_vfpclassbf16128_mask:
+ case X86::BI__builtin_ia32_vfpclassbf16256_mask:
+ case X86::BI__builtin_ia32_vfpclassbf16512_mask:
+ case X86::BI__builtin_ia32_fpclassph128_mask:
+ case X86::BI__builtin_ia32_fpclassph256_mask:
+ case X86::BI__builtin_ia32_fpclassph512_mask:
+ case X86::BI__builtin_ia32_fpclasspd128_mask:
+ case X86::BI__builtin_ia32_fpclasspd256_mask:
+ case X86::BI__builtin_ia32_fpclasspd512_mask:
+ case X86::BI__builtin_ia32_vp2intersect_q_512:
+ case X86::BI__builtin_ia32_vp2intersect_q_256:
+ case X86::BI__builtin_ia32_vp2intersect_q_128:
+ case X86::BI__builtin_ia32_vp2intersect_d_512:
+ case X86::BI__builtin_ia32_vp2intersect_d_256:
+ case X86::BI__builtin_ia32_vp2intersect_d_128:
+ case X86::BI__builtin_ia32_vpmultishiftqb128:
+ case X86::BI__builtin_ia32_vpmultishiftqb256:
+ case X86::BI__builtin_ia32_vpmultishiftqb512:
+ case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
+ case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
+ case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
+ case X86::BI__builtin_ia32_cmpeqps:
+ case X86::BI__builtin_ia32_cmpeqpd:
+ case X86::BI__builtin_ia32_cmpltps:
+ case X86::BI__builtin_ia32_cmpltpd:
+ case X86::BI__builtin_ia32_cmpleps:
+ case X86::BI__builtin_ia32_cmplepd:
+ case X86::BI__builtin_ia32_cmpunordps:
+ case X86::BI__builtin_ia32_cmpunordpd:
+ case X86::BI__builtin_ia32_cmpneqps:
+ case X86::BI__builtin_ia32_cmpneqpd:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ case X86::BI__builtin_ia32_cmpnltps:
+ case X86::BI__builtin_ia32_cmpnltpd:
+ return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
+ cir::CmpOpKind::lt, /*shouldInvert=*/true);
+ case X86::BI__builtin_ia32_cmpnleps:
+ case X86::BI__builtin_ia32_cmpnlepd:
+ return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
+ cir::CmpOpKind::le, /*shouldInvert=*/true);
+ case X86::BI__builtin_ia32_cmpordps:
+ case X86::BI__builtin_ia32_cmpordpd:
+ case X86::BI__builtin_ia32_cmpph128_mask:
+ case X86::BI__builtin_ia32_cmpph256_mask:
+ case X86::BI__builtin_ia32_cmpph512_mask:
+ case X86::BI__builtin_ia32_cmpps128_mask:
+ case X86::BI__builtin_ia32_cmpps256_mask:
+ case X86::BI__builtin_ia32_cmpps512_mask:
+ case X86::BI__builtin_ia32_cmppd128_mask:
+ case X86::BI__builtin_ia32_cmppd256_mask:
+ case X86::BI__builtin_ia32_cmppd512_mask:
+ case X86::BI__builtin_ia32_vcmpbf16512_mask:
+ case X86::BI__builtin_ia32_vcmpbf16256_mask:
+ case X86::BI__builtin_ia32_vcmpbf16128_mask:
+ case X86::BI__builtin_ia32_cmpps:
+ case X86::BI__builtin_ia32_cmpps256:
+ case X86::BI__builtin_ia32_cmppd:
+ case X86::BI__builtin_ia32_cmppd256:
+ case X86::BI__builtin_ia32_cmpeqss:
+ case X86::BI__builtin_ia32_cmpltss:
+ case X86::BI__builtin_ia32_cmpless:
+ case X86::BI__builtin_ia32_cmpunordss:
+ case X86::BI__builtin_ia32_cmpneqss:
+ case X86::BI__builtin_ia32_cmpnltss:
+ case X86::BI__builtin_ia32_cmpnless:
+ case X86::BI__builtin_ia32_cmpordss:
+ case X86::BI__builtin_ia32_cmpeqsd:
+ case X86::BI__builtin_ia32_cmpltsd:
+ case X86::BI__builtin_ia32_cmplesd:
+ case X86::BI__builtin_ia32_cmpunordsd:
+ case X86::BI__builtin_ia32_cmpneqsd:
+ case X86::BI__builtin_ia32_cmpnltsd:
+ case X86::BI__builtin_ia32_cmpnlesd:
+ case X86::BI__builtin_ia32_cmpordsd:
+ case X86::BI__builtin_ia32_vcvtph2ps_mask:
+ case X86::BI__builtin_ia32_vcvtph2ps256_mask:
+ case X86::BI__builtin_ia32_vcvtph2ps512_mask:
+ case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
+ case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
+ case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
+ case X86::BI__cpuid:
+ case X86::BI__cpuidex:
+ case X86::BI__emul:
+ case X86::BI__emulu:
+ case X86::BI__mulh:
+ case X86::BI__umulh:
+ case X86::BI_mul128:
+ case X86::BI_umul128:
+ case X86::BI__faststorefence:
+ case X86::BI__shiftleft128:
+ case X86::BI__shiftright128:
+ case X86::BI_ReadWriteBarrier:
+ case X86::BI_ReadBarrier:
+ case X86::BI_WriteBarrier:
+ case X86::BI_AddressOfReturnAddress:
+ case X86::BI__stosb:
+ case X86::BI__ud2:
+ case X86::BI__int2c:
+ case X86::BI__readfsbyte:
+ case X86::BI__readfsword:
+ case X86::BI__readfsdword:
+ case X86::BI__readfsqword:
+ case X86::BI__readgsbyte:
+ case X86::BI__readgsword:
+ case X86::BI__readgsdword:
+ case X86::BI__readgsqword:
+ case X86::BI__builtin_ia32_encodekey128_u32:
+ case X86::BI__builtin_ia32_encodekey256_u32:
+ case X86::BI__builtin_ia32_aesenc128kl_u8:
+ case X86::BI__builtin_ia32_aesdec128kl_u8:
+ case X86::BI__builtin_ia32_aesenc256kl_u8:
+ case X86::BI__builtin_ia32_aesdec256kl_u8:
+ case X86::BI__builtin_ia32_aesencwide128kl_u8:
+ case X86::BI__builtin_ia32_aesdecwide128kl_u8:
+ case X86::BI__builtin_ia32_aesencwide256kl_u8:
+ case X86::BI__builtin_ia32_aesdecwide256kl_u8:
+ case X86::BI__builtin_ia32_vfcmaddcph512_mask:
+ case X86::BI__builtin_ia32_vfmaddcph512_mask:
+ case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
+ case X86::BI__builtin_ia32_vfmaddcsh_round_mask:
+ case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
+ case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
+ case X86::BI__builtin_ia32_prefetchi:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ }
+}
diff --git a/my-sqrt-changes.patch b/my-sqrt-changes.patch
deleted file mode 100644
index 87c0ca69ac8abe6aaa684ffbbce3c65e342f6066..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001
literal 12058
zcmdU#Z%-V_5yo4}yGZ#CbNCYBz=91N*n~S7WA4rpIa`=G7o8$*fn7{u*uCAw4m$Z$
z<U{2H<$aRp*R at 6WjAxexmdK}(%}h^sRdsd!d8%gr`=6t5656_o(9m_Pr-AO9VKE$r
zxB7dgu|<uZ#pm;I6k3&cTj6=w)z~x5YHCeOGY5LMQG5F$bVENZgg2snP+31c+P?|E
zj`AVHK>v=!tu3i|F2ZT(#J5Fw7#`?a4J*1>g9PmBADXqIw at 2E?gZTD9vmS+IJum2D
zRow**wAax3yVD$8=HgXIgUI_%f6v0X_V+gK<4JfT2^!kLd{_!=x}TH0^ZHll&67B%
zp*4%)%XlI0$LEtq3vtQMw2%h7;@i_M2YS;H_kQGPyPx#`-$zYt4foOrJex$wX4)*@
z72&0L_M&ZeqO}fWOPuPCuGis at x_hbjo$xn}Vt?}*IUk;DPxn55Q#)vChOa)+eJkz*
zYVV{s&bbKhwLjM69JXa4%<k%q?|vbZw+OHF7CLV<V-RhAJ?fhidM689kc7SHQ9JUF
zt*{-w)0%CK4 at 4W!YsVRd=9eYIM(y2cru{1Sm-52zrOlnv>V|k>FK6O_x1Z>~7uLls
z*GA)V?|;_(z3^?e_l8zf_g;5SZda39!rjli%C7cwRwvPlQ=~vw7X5m<!@}Kq>g}Ji
zNpsAjs=kw-RMOn1uP>AJUbGvFlH`glS=gp*_nWXkj3E26yyqI*ku`70diM3Sd-?W-
zyz`Zwz8!kYy=aeU{7f3YJ-sAlo~Md1BcH>Qdh$ZcM^6<M$OxQqZ<v#2E~P>qPTum|
zxb=9_Sx9;9<}~)gMU1MJAFo3P?$5Lj(<AZ9q7r#_QaZ#0G|`Sx5o<?N#2Q9jHaIa_
zo}2R-eF8a-m;@_qipyZ4v$-y1-%VN5(~~?|!$P!<!^=}}9>icnT;%Y at TU-AB7rhN6
z``zI;sG}$R3hxAsHHBp!%Oh80kB`F_dIGDql`j{Rtsd+7K-qd(Z(2fXOS)gtsw%#-
zEOU3*%q4epc2@}3`8dv7)=nR3 at AKL_ST8*<OE-r3$U{U3M|#BNvltVJyqog((-_5y
zhzHC+_v6ThthX88&qOr!gW@@P at jUwGiSSHIxT_U$bx$#DJD!Z8uYUMz_)0u~jwf2^
z4nzYg2OB|kOLY2r?r1$y^dqGM&AJF*M+{^*rw}saME@|P^kMiHOPPy4^r`1ilM-zB
zlQcJoy8R_dZ?aacDBn%J?evHHb?bQ<EvHqojJc9!^flvD7fAm?Z#elGS;Je|gH;?<
zqYdSvPK***6I{;Y3UCA!Ku0>B8QRNSw3ad#cG8y}_e3KhdE at rO&|+-`Rtr9a@{d{p
zj#!8sImaJ0+7SivWU7Y>&BLa<l8S7ZqsLbCfm4l=NgJ`o!0t!w0{VC^nH at SrMx$$J
zGT&V;Ynn&ZYko%DhEpq2V^357cDAhb>z}{+SNOM{n)=tiLi2~3MUTMhD?>5VUDP<^
zDHA)W(wIrD^+tE5iL#DB)kc at yql0lWjn|4{d8%dXCeckAvWP=%n9zK#%e02<L=`y@
zwF}vV?Y6vUs>Nos1R`I*R@;UFutwKpnFgow+o&Uap=yD=BTG`reRlb_EgPY?Q704h
zjAwoVbv)l`<|v-Fw#qlBxKWX3S-H80vaTpEBz_+h4J at +fwrmpSnx_v1_UJ^fG9O51
zh;DPsFbkxbPgd5 at +O~Wy*-w(fGZU74EDr9W*c&>Sjh at Go=f_RSL!R=;^g=6NM{X~~
zA&ooYTo0dT-h2OY&zy&I&+QXlT*lpYeOvxH{Ukx17Ou+1A4kc2cf`u|7#-bTW-GfV
zx){c9=v3)*3`W?5WqY#?v)oTT+q-}LNfv^}t=H64^yQqX5YYH9V=t3c7u44vUa#%8
z<Z0yCZ}ji%a@?N0`nKx%d!g4=MbATNr7N9Oar3k&qj91MQV}nyDGp*au_SM$3%nX5
zCRIf%RuxVC-_rPz<}JlaWAdtEno^IdD5lB7tBEz~Wm!o7p)RV4^t`NRo8{e<b$R&f
zn6u8MCy$GE#$PLf(naxZ1aqkpKrwdWWh)?0cI7Lxaa~{Yj<w#p_XirITVwX}T-}cO
zq5lva$(n{5j4qs>s=nFRRbII-DoyQfNpBpN_Lb-N#h2)cZR|@%=>1((%txoe1G;z3
zqd%5M!m?mKWa+7M_ZR7DRyf)BZ2C@;8YZ=lds&x#txEaB@)XFM>W<owUg*!^!)3*G
zdo6Z#I!p4`)hV6~G^cmYeBGM9gobOZhUTk`T}^2j<P37?$OjB197p}{#F1Im;tk;}
z8*~5iclT>fFZXMkmsd2m<>Qi*_YGTMS7j-`)2`?QfWZ2CcR%h6{6jSQoi1v@$vm6W
z039~v%T|H$rHVLJMR>`QlZ?48@)&1pP1_#NUd0$>HIVDMyf53)tU6Ok at AbSKE4q|C
z8#m*;Fg#D6hmH(8rwG0zADJJ1`CF4kJ{mrKiqSi1A-^-s&K`Zg+YvcACeL?Im$k1m
z-H_#w5vgd`YtZ*sLwmxCQe at A$ru*yJ7<hsX0ND<k+Yn~3Iw#c`^ug%#V!NlZW=^Hf
z?hNbJ`MGB-vj&gGpc(oRbTYu1bQ?DG<a&6O^_$Ruc`^05*%O*fPi{*oTckKs=3mdD
zi2XkFpUX3QnTJhU_)R89I+1c_aar6tcC~BzMqq^G5w_ZUg*@F3D|Cew^7wu;Wc)0m
z5b6Nz#lJ`yZZ5x at 921w<O+vGZhL;sHs8X<@mUj72_YakALBp#$zqamIgq;p`w<cV?
zqOqH!;hV`jDGt6Fb&sEXPgs#p;GrO3f2O-`bLd!GW<1nb9(Bl?{CdVZIk{D0d-U55
zm5ms8ltd5VNYSc{mS;83Jvg6D<w#$nEBR6k>t*!H^57GPy6_Imr*$3l#x?ffQsr?4
ze<xzoOKmHjSl-SNONt^?V8ompuRPb7b#qwhKAp#SliH$UqGcVCDz#keSJ6X<Mit4u
z$72*fi+z{G<9^h5ip*uLI~L%CwxtO}*kJ3r4i;-YUgaIEyi=R{kSR|zG8wEtwmCds
zdf1@#)I;=oY*F85_?^kF-lu$oRL2?_=q$)xd<`9~jqByu at tVs0Wt4qhwNHzd<=!5C
z!5OAK*6%6VPeK`Hjc%I@^2iJ6XFqBaZ_hKTb@;4n?KW!I0<@Z2JsHRCD)Dmro))61
zY8$R)+nh|}taJ2{ETK$kl$EFXP~n=1GVoP~h=F~9IP%jm3#@*}$?ey^3z+6i-yipH
z>!^(SdXW23d8f!<j-2BoxktfW?aU)No<A-UnqJIn^L1#3T~jgJ*BSaCXw1G4*%u#+
z4Aukb3g=MA97XH0-)Q(;A^`b;_24Y4{_u0MkL?plOpf>B)YR;5j8epdb|n*WYGf#4
z3t7*40d4)|`<By1UW*?t)A(PKRy$W{`@Lw8L5NHCJ5s2Ek at ob~f8n>NJD;16PmYz+
z`rGnCo6txd#WhuU52V9YjU5UxeG!oq80xXG)0)QUF+LEoSkiqn)(MmTPT}r!#y%#+
zR at 2p<V=jKd;1#RgJK8dfy%+U7>Ni>Rn$Ly^gw7#P*oplnl(&3Owo#AR`5OB at Q~DiK
z5ytbY;>%~^w5JS<)!N<7X*m*f4DZiGm+~)`t|y~w!+!r}|NB^#_I3B~bjnoATDrML
i>Z$uV<~q4bZj<k6%t_u&-c)?ycV>LO at Ne}=*8c%%jt^@9
>From 47f9b2ffd8a1893c2929b003811ba0716883c8a0 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Sun, 7 Dec 2025 17:21:57 +0530
Subject: [PATCH 30/32] update
clang\lib\CIR\Lowering\DirectToLLVM\LowerToLLVM.cpp
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index d2ffb9d98d6d0..228da428844e9 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -186,7 +186,7 @@ mlir::LogicalResult CIRToLLVMCopyOpLowering::matchAndRewrite(
return mlir::success();
}
-mlir::LogicalResult SqrtOpLowering::matchAndRewrite(
+mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
cir::SqrtOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
mlir::Type resTy = typeConverter->convertType(op.getType());
>From 15f1f4f11204e308fd05739f21449df91b69da60 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Sun, 7 Dec 2025 21:52:03 +0530
Subject: [PATCH 31/32] update clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
---
.../test/CIR/CodeGen/X86/cir-sqrt-builtins.c | 88 +++++++++----------
1 file changed, 44 insertions(+), 44 deletions(-)
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
index a3de192f9e142..d540e9c227e67 100644
--- a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
@@ -1,45 +1,45 @@
-#include <immintrin.h>
-// Test X86-specific sqrt builtins
-
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
-// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
-// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
-
-// Test __builtin_ia32_sqrtph512
-__m512h test_sqrtph512(__m512h a) {
- return __builtin_ia32_sqrtph512(a);
-}
-// CIR: cir.func @test_sqrtph512
-// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.fp16 x 32>
-// CIR: cir.return [[RES]]
-// LLVM: define {{.*}} @test_sqrtph512
-// LLVM: call <32 x half> @llvm.sqrt.v32f16
-// OGCG: define {{.*}} @test_sqrtph512
-// OGCG: call <32 x half> @llvm.sqrt.v32f16
-
-// Test __builtin_ia32_sqrtps512
-__m512 test_sqrtps512(__m512 a) {
- return __builtin_ia32_sqrtps512(a);
-}
-// CIR: cir.func @test_sqrtps512
-// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.float x 16>
-// CIR: cir.return [[RES]]
-// LLVM: define {{.*}} @test_sqrtps512
-// LLVM: call <16 x float> @llvm.sqrt.v16f32
-// OGCG: define {{.*}} @test_sqrtps512
-// OGCG: call <16 x float> @llvm.sqrt.v16f32
-
-// Test __builtin_ia32_sqrtpd512
-__m512d test_sqrtpd512(__m512d a) {
- return __builtin_ia32_sqrtpd512(a);
-}
-// CIR: cir.func @test_sqrtpd512
-// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.double x 8>
-// CIR: cir.return [[RES]]
-// LLVM: define {{.*}} @test_sqrtpd512
-// LLVM: call <8 x double> @llvm.sqrt.v8f64
-// OGCG: define {{.*}} @test_sqrtpd512
+// Test X86-specific sqrt builtins
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +avx512f -target-feature +avx512fp16 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
+typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
+typedef _Float16 __m512h __attribute__((__vector_size__(64), __aligned__(64)));
+
+// Test __builtin_ia32_sqrtph512
+__m512h test_sqrtph512(__m512h a) {
+ return __builtin_ia32_sqrtph512(a, 4);
+}
+// CIR-LABEL: cir.func {{.*}}@test_sqrtph512
+// CIR: cir.sqrt {{%.*}} : !cir.vector<32 x !cir.f16>
+// LLVM-LABEL: define {{.*}} @test_sqrtph512
+// LLVM: call <32 x half> @llvm.sqrt.v32f16
+// OGCG-LABEL: define {{.*}} @test_sqrtph512
+// OGCG: call <32 x half> @llvm.sqrt.v32f16
+
+// Test __builtin_ia32_sqrtps512
+__m512 test_sqrtps512(__m512 a) {
+ return __builtin_ia32_sqrtps512(a, 4);
+}
+// CIR-LABEL: cir.func {{.*}}@test_sqrtps512
+// CIR: cir.sqrt {{%.*}} : !cir.vector<16 x !cir.float>
+// LLVM-LABEL: define {{.*}} @test_sqrtps512
+// LLVM: call <16 x float> @llvm.sqrt.v16f32
+// OGCG-LABEL: define {{.*}} @test_sqrtps512
+// OGCG: call <16 x float> @llvm.sqrt.v16f32
+
+// Test __builtin_ia32_sqrtpd512
+__m512d test_sqrtpd512(__m512d a) {
+ return __builtin_ia32_sqrtpd512(a, 4);
+}
+// CIR-LABEL: cir.func {{.*}}@test_sqrtpd512
+// CIR: cir.sqrt {{%.*}} : !cir.vector<8 x !cir.double>
+// LLVM-LABEL: define {{.*}} @test_sqrtpd512
+// LLVM: call <8 x double> @llvm.sqrt.v8f64
+// OGCG-LABEL: define {{.*}} @test_sqrtpd512
// OGCG: call <8 x double> @llvm.sqrt.v8f64
\ No newline at end of file
>From b12779ab57b19b35a6affc361f3882574bbdddfd Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Tue, 9 Dec 2025 02:15:00 +0530
Subject: [PATCH 32/32] Fix line endings in CIRGenBuiltinX86.cpp
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 3142 ++++++++++----------
1 file changed, 1571 insertions(+), 1571 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 080a696b868cf..fb17e31bf36d6 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1,1571 +1,1571 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This contains code to emit x86/x86_64 Builtin calls as CIR or a function
-// call to be later resolved.
-//
-//===----------------------------------------------------------------------===//
-
-#include "CIRGenBuilder.h"
-#include "CIRGenFunction.h"
-#include "CIRGenModule.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/ValueRange.h"
-#include "clang/Basic/Builtins.h"
-#include "clang/Basic/TargetBuiltins.h"
-#include "clang/CIR/Dialect/IR/CIRTypes.h"
-#include "clang/CIR/MissingFeatures.h"
-
-using namespace clang;
-using namespace clang::CIRGen;
-
-template <typename... Operands>
-static mlir::Value emitIntrinsicCallOp(CIRGenBuilderTy &builder,
- mlir::Location loc, const StringRef str,
- const mlir::Type &resTy,
- Operands &&...op) {
- return cir::LLVMIntrinsicCallOp::create(builder, loc,
- builder.getStringAttr(str), resTy,
- std::forward<Operands>(op)...)
- .getResult();
-}
-
-// OG has unordered comparison as a form of optimization in addition to
-// ordered comparison, while CIR doesn't.
-//
-// This means that we can't encode the comparison code of UGT (unordered
-// greater than), at least not at the CIR level.
-//
-// The boolean shouldInvert compensates for this.
-// For example: to get to the comparison code UGT, we pass in
-// emitVectorFCmp (OLE, shouldInvert = true) since OLE is the inverse of UGT.
-
-// There are several ways to support this otherwise:
-// - register extra CmpOpKind for unordered comparison types and build the
-// translation code for
-// to go from CIR -> LLVM dialect. Notice we get this naturally with
-// shouldInvert, benefiting from existing infrastructure, albeit having to
-// generate an extra `not` at CIR).
-// - Just add extra comparison code to a new VecCmpOpKind instead of
-// cluttering CmpOpKind.
-// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered
-// comparison
-// - Just emit the intrinsics call instead of calling this helper, see how the
-// LLVM lowering handles this.
-static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder,
- llvm::SmallVector<mlir::Value> &ops,
- mlir::Location loc, cir::CmpOpKind pred,
- bool shouldInvert) {
- assert(!cir::MissingFeatures::cgFPOptionsRAII());
- // TODO(cir): Add isSignaling boolean once emitConstrainedFPCall implemented
- assert(!cir::MissingFeatures::emitConstrainedFPCall());
- mlir::Value cmp = builder.createVecCompare(loc, pred, ops[0], ops[1]);
- mlir::Value bitCast = builder.createBitcast(
- shouldInvert ? builder.createNot(cmp) : cmp, ops[0].getType());
- return bitCast;
-}
-
-static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc,
- mlir::Value mask, unsigned numElems) {
- auto maskTy = cir::VectorType::get(
- builder.getUIntNTy(1), cast<cir::IntType>(mask.getType()).getWidth());
- mlir::Value maskVec = builder.createBitcast(mask, maskTy);
-
- // If we have less than 8 elements, then the starting mask was an i8 and
- // we need to extract down to the right number of elements.
- if (numElems < 8) {
- SmallVector<mlir::Attribute, 4> indices;
- mlir::Type i32Ty = builder.getSInt32Ty();
- for (auto i : llvm::seq<unsigned>(0, numElems))
- indices.push_back(cir::IntAttr::get(i32Ty, i));
-
- maskVec = builder.createVecShuffle(loc, maskVec, maskVec, indices);
- }
- return maskVec;
-}
-
-// Builds the VecShuffleOp for pshuflw and pshufhw x86 builtins.
-//
-// The vector is split into lanes of 8 word elements (16 bits). The lower or
-// upper half of each lane, controlled by `isLow`, is shuffled in the following
-// way: The immediate is truncated to 8 bits, separated into 4 2-bit fields. The
-// i-th field's value represents the resulting index of the i-th element in the
-// half lane after shuffling. The other half of the lane remains unchanged.
-static cir::VecShuffleOp emitPshufWord(CIRGenBuilderTy &builder,
- const mlir::Value vec,
- const mlir::Value immediate,
- const mlir::Location loc,
- const bool isLow) {
- uint32_t imm = CIRGenFunction::getZExtIntValueFromConstOp(immediate);
-
- auto vecTy = cast<cir::VectorType>(vec.getType());
- unsigned numElts = vecTy.getSize();
-
- unsigned firstHalfStart = isLow ? 0 : 4;
- unsigned secondHalfStart = 4 - firstHalfStart;
-
- // Splat the 8-bits of immediate 4 times to help the loop wrap around.
- imm = (imm & 0xff) * 0x01010101;
-
- int64_t indices[32];
- for (unsigned l = 0; l != numElts; l += 8) {
- for (unsigned i = firstHalfStart; i != firstHalfStart + 4; ++i) {
- indices[l + i] = l + (imm & 3) + firstHalfStart;
- imm >>= 2;
- }
- for (unsigned i = secondHalfStart; i != secondHalfStart + 4; ++i)
- indices[l + i] = l + i;
- }
-
- return builder.createVecShuffle(loc, vec, ArrayRef(indices, numElts));
-}
-
-// Builds the shuffle mask for pshufd and shufpd/shufps x86 builtins.
-// The shuffle mask is written to outIndices.
-static void
-computeFullLaneShuffleMask(CIRGenFunction &cgf, const mlir::Value vec,
- uint32_t imm, const bool isShufP,
- llvm::SmallVectorImpl<int64_t> &outIndices) {
- auto vecTy = cast<cir::VectorType>(vec.getType());
- unsigned numElts = vecTy.getSize();
- unsigned numLanes = cgf.cgm.getDataLayout().getTypeSizeInBits(vecTy) / 128;
- unsigned numLaneElts = numElts / numLanes;
-
- // Splat the 8-bits of immediate 4 times to help the loop wrap around.
- imm = (imm & 0xff) * 0x01010101;
-
- for (unsigned l = 0; l != numElts; l += numLaneElts) {
- for (unsigned i = 0; i != numLaneElts; ++i) {
- uint32_t idx = imm % numLaneElts;
- imm /= numLaneElts;
- if (isShufP && i >= (numLaneElts / 2))
- idx += numElts;
- outIndices[l + i] = l + idx;
- }
- }
-
- outIndices.resize(numElts);
-}
-
-static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
- mlir::Location loc,
- const std::string &intrinsicName,
- SmallVectorImpl<mlir::Value> &ops) {
-
- auto intTy = cast<cir::IntType>(ops[0].getType());
- unsigned numElts = intTy.getWidth();
- mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts);
- mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts);
- mlir::Type vecTy = lhsVec.getType();
- mlir::Value resVec = emitIntrinsicCallOp(builder, loc, intrinsicName, vecTy,
- mlir::ValueRange{lhsVec, rhsVec});
- return builder.createBitcast(resVec, ops[0].getType());
-}
-
-static mlir::Value emitX86MaskUnpack(CIRGenBuilderTy &builder,
- mlir::Location loc,
- const std::string &intrinsicName,
- SmallVectorImpl<mlir::Value> &ops) {
- unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
-
- // Convert both operands to mask vectors.
- mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElems);
- mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElems);
-
- mlir::Type i32Ty = builder.getSInt32Ty();
-
- // Create indices for extracting the first half of each vector.
- SmallVector<mlir::Attribute, 32> halfIndices;
- for (auto i : llvm::seq<unsigned>(0, numElems / 2))
- halfIndices.push_back(cir::IntAttr::get(i32Ty, i));
-
- // Extract first half of each vector. This gives better codegen than
- // doing it in a single shuffle.
- mlir::Value lhsHalf = builder.createVecShuffle(loc, lhs, lhs, halfIndices);
- mlir::Value rhsHalf = builder.createVecShuffle(loc, rhs, rhs, halfIndices);
-
- // Create indices for concatenating the vectors.
- // NOTE: Operands are swapped to match the intrinsic definition.
- // After the half extraction, both vectors have numElems/2 elements.
- // In createVecShuffle(rhsHalf, lhsHalf, indices), indices [0..numElems/2-1]
- // select from rhsHalf, and indices [numElems/2..numElems-1] select from
- // lhsHalf.
- SmallVector<mlir::Attribute, 64> concatIndices;
- for (auto i : llvm::seq<unsigned>(0, numElems))
- concatIndices.push_back(cir::IntAttr::get(i32Ty, i));
-
- // Concat the vectors (RHS first, then LHS).
- mlir::Value res =
- builder.createVecShuffle(loc, rhsHalf, lhsHalf, concatIndices);
- return builder.createBitcast(res, ops[0].getType());
-}
-
-static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder,
- mlir::Location loc,
- cir::BinOpKind binOpKind,
- SmallVectorImpl<mlir::Value> &ops,
- bool invertLHS = false) {
- unsigned numElts = cast<cir::IntType>(ops[0].getType()).getWidth();
- mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElts);
- mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElts);
-
- if (invertLHS)
- lhs = builder.createNot(lhs);
- return builder.createBitcast(builder.createBinop(loc, lhs, binOpKind, rhs),
- ops[0].getType());
-}
-
-static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc,
- const std::string &intrinsicName,
- SmallVectorImpl<mlir::Value> &ops) {
- auto intTy = cast<cir::IntType>(ops[0].getType());
- unsigned numElts = intTy.getWidth();
- mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts);
- mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts);
- mlir::Type resTy = builder.getSInt32Ty();
- return emitIntrinsicCallOp(builder, loc, intrinsicName, resTy,
- mlir::ValueRange{lhsVec, rhsVec});
-}
-
-static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc,
- mlir::Value vec, mlir::Value value,
- mlir::Value indexOp) {
- unsigned numElts = cast<cir::VectorType>(vec.getType()).getSize();
-
- uint64_t index =
- indexOp.getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
-
- index &= numElts - 1;
-
- cir::ConstantOp indexVal = builder.getUInt64(index, loc);
-
- return cir::VecInsertOp::create(builder, loc, vec, value, indexVal);
-}
-
-static mlir::Value emitX86FunnelShift(CIRGenBuilderTy &builder,
- mlir::Location location, mlir::Value &op0,
- mlir::Value &op1, mlir::Value &amt,
- bool isRight) {
- mlir::Type op0Ty = op0.getType();
-
- // Amount may be scalar immediate, in which case create a splat vector.
- // Funnel shifts amounts are treated as modulo and types are all power-of-2
- // so we only care about the lowest log2 bits anyway.
- if (amt.getType() != op0Ty) {
- auto vecTy = mlir::cast<cir::VectorType>(op0Ty);
- uint64_t numElems = vecTy.getSize();
-
- auto amtTy = mlir::cast<cir::IntType>(amt.getType());
- auto vecElemTy = mlir::cast<cir::IntType>(vecTy.getElementType());
-
- // If signed, cast to the same width but unsigned first to
- // ensure zero-extension when casting to a bigger unsigned `vecElemeTy`.
- if (amtTy.isSigned()) {
- cir::IntType unsignedAmtTy = builder.getUIntNTy(amtTy.getWidth());
- amt = builder.createIntCast(amt, unsignedAmtTy);
- }
- cir::IntType unsignedVecElemType = builder.getUIntNTy(vecElemTy.getWidth());
- amt = builder.createIntCast(amt, unsignedVecElemType);
- amt = cir::VecSplatOp::create(
- builder, location, cir::VectorType::get(unsignedVecElemType, numElems),
- amt);
- }
-
- const StringRef intrinsicName = isRight ? "fshr" : "fshl";
- return emitIntrinsicCallOp(builder, location, intrinsicName, op0Ty,
- mlir::ValueRange{op0, op1, amt});
-}
-
-static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
- bool isSigned,
- SmallVectorImpl<mlir::Value> &ops,
- unsigned opTypePrimitiveSizeInBits) {
- mlir::Type ty = cir::VectorType::get(builder.getSInt64Ty(),
- opTypePrimitiveSizeInBits / 64);
- mlir::Value lhs = builder.createBitcast(loc, ops[0], ty);
- mlir::Value rhs = builder.createBitcast(loc, ops[1], ty);
- if (isSigned) {
- cir::ConstantOp shiftAmt =
- builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32));
- cir::VecSplatOp shiftSplatVecOp =
- cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult());
- mlir::Value shiftSplatValue = shiftSplatVecOp.getResult();
- // In CIR, right-shift operations are automatically lowered to either an
- // arithmetic or logical shift depending on the operand type. The purpose
- // of the shifts here is to propagate the sign bit of the 32-bit input
- // into the upper bits of each vector lane.
- lhs = builder.createShift(loc, lhs, shiftSplatValue, true);
- lhs = builder.createShift(loc, lhs, shiftSplatValue, false);
- rhs = builder.createShift(loc, rhs, shiftSplatValue, true);
- rhs = builder.createShift(loc, rhs, shiftSplatValue, false);
- } else {
- cir::ConstantOp maskScalar = builder.getConstant(
- loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff));
- cir::VecSplatOp mask =
- cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult());
- // Clear the upper bits
- lhs = builder.createAnd(loc, lhs, mask);
- rhs = builder.createAnd(loc, rhs, mask);
- }
- return builder.createMul(loc, lhs, rhs);
-}
-
-static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
- llvm::SmallVector<mlir::Value> ops,
- bool isSigned) {
- mlir::Value op0 = ops[0];
- mlir::Value op1 = ops[1];
-
- cir::VectorType ty = cast<cir::VectorType>(op0.getType());
- cir::IntType elementTy = cast<cir::IntType>(ty.getElementType());
-
- uint64_t imm = CIRGenFunction::getZExtIntValueFromConstOp(ops[2]) & 0x7;
-
- cir::CmpOpKind pred;
- switch (imm) {
- case 0x0:
- pred = cir::CmpOpKind::lt;
- break;
- case 0x1:
- pred = cir::CmpOpKind::le;
- break;
- case 0x2:
- pred = cir::CmpOpKind::gt;
- break;
- case 0x3:
- pred = cir::CmpOpKind::ge;
- break;
- case 0x4:
- pred = cir::CmpOpKind::eq;
- break;
- case 0x5:
- pred = cir::CmpOpKind::ne;
- break;
- case 0x6:
- return builder.getNullValue(ty, loc); // FALSE
- case 0x7: {
- llvm::APInt allOnes = llvm::APInt::getAllOnes(elementTy.getWidth());
- return cir::VecSplatOp::create(
- builder, loc, ty,
- builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE
- }
- default:
- llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
- }
-
- if ((!isSigned && elementTy.isSigned()) ||
- (isSigned && elementTy.isUnsigned())) {
- elementTy = elementTy.isSigned() ? builder.getUIntNTy(elementTy.getWidth())
- : builder.getSIntNTy(elementTy.getWidth());
- ty = cir::VectorType::get(elementTy, ty.getSize());
- op0 = builder.createBitcast(op0, ty);
- op1 = builder.createBitcast(op1, ty);
- }
-
- return builder.createVecCompare(loc, pred, op0, op1);
-}
-
-mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
- const CallExpr *expr) {
- if (builtinID == Builtin::BI__builtin_cpu_is) {
- cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is");
- return {};
- }
- if (builtinID == Builtin::BI__builtin_cpu_supports) {
- cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_supports");
- return {};
- }
- if (builtinID == Builtin::BI__builtin_cpu_init) {
- cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_init");
- return {};
- }
-
- // Handle MSVC intrinsics before argument evaluation to prevent double
- // evaluation.
- assert(!cir::MissingFeatures::msvcBuiltins());
-
- // Find out if any arguments are required to be integer constant expressions.
- assert(!cir::MissingFeatures::handleBuiltinICEArguments());
-
- // The operands of the builtin call
- llvm::SmallVector<mlir::Value> ops;
-
- // `ICEArguments` is a bitmap indicating whether the argument at the i-th bit
- // is required to be a constant integer expression.
- unsigned iceArguments = 0;
- ASTContext::GetBuiltinTypeError error;
- getContext().GetBuiltinType(builtinID, error, &iceArguments);
- assert(error == ASTContext::GE_None && "Error while getting builtin type.");
-
- for (auto [idx, arg] : llvm::enumerate(expr->arguments()))
- ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg));
-
- CIRGenBuilderTy &builder = getBuilder();
- mlir::Type voidTy = builder.getVoidTy();
-
- switch (builtinID) {
- default:
- return {};
- case X86::BI_mm_clflush:
- return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
- "x86.sse2.clflush", voidTy, ops[0]);
- case X86::BI_mm_lfence:
- return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
- "x86.sse2.lfence", voidTy);
- case X86::BI_mm_pause:
- return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
- "x86.sse2.pause", voidTy);
- case X86::BI_mm_mfence:
- return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
- "x86.sse2.mfence", voidTy);
- case X86::BI_mm_sfence:
- return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
- "x86.sse.sfence", voidTy);
- case X86::BI_mm_prefetch:
- case X86::BI__rdtsc:
- case X86::BI__builtin_ia32_rdtscp: {
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
- }
- case X86::BI__builtin_ia32_lzcnt_u16:
- case X86::BI__builtin_ia32_lzcnt_u32:
- case X86::BI__builtin_ia32_lzcnt_u64: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- mlir::Value isZeroPoison = builder.getFalse(loc);
- return emitIntrinsicCallOp(builder, loc, "ctlz", ops[0].getType(),
- mlir::ValueRange{ops[0], isZeroPoison});
- }
- case X86::BI__builtin_ia32_tzcnt_u16:
- case X86::BI__builtin_ia32_tzcnt_u32:
- case X86::BI__builtin_ia32_tzcnt_u64: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- mlir::Value isZeroPoison = builder.getFalse(loc);
- return emitIntrinsicCallOp(builder, loc, "cttz", ops[0].getType(),
- mlir::ValueRange{ops[0], isZeroPoison});
- }
- case X86::BI__builtin_ia32_undef128:
- case X86::BI__builtin_ia32_undef256:
- case X86::BI__builtin_ia32_undef512:
- // The x86 definition of "undef" is not the same as the LLVM definition
- // (PR32176). We leave optimizing away an unnecessary zero constant to the
- // IR optimizer and backend.
- // TODO: If we had a "freeze" IR instruction to generate a fixed undef
- // value, we should use that here instead of a zero.
- return builder.getNullValue(convertType(expr->getType()),
- getLoc(expr->getExprLoc()));
- case X86::BI__builtin_ia32_vec_ext_v4hi:
- case X86::BI__builtin_ia32_vec_ext_v16qi:
- case X86::BI__builtin_ia32_vec_ext_v8hi:
- case X86::BI__builtin_ia32_vec_ext_v4si:
- case X86::BI__builtin_ia32_vec_ext_v4sf:
- case X86::BI__builtin_ia32_vec_ext_v2di:
- case X86::BI__builtin_ia32_vec_ext_v32qi:
- case X86::BI__builtin_ia32_vec_ext_v16hi:
- case X86::BI__builtin_ia32_vec_ext_v8si:
- case X86::BI__builtin_ia32_vec_ext_v4di: {
- unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
-
- uint64_t index = getZExtIntValueFromConstOp(ops[1]);
- index &= numElts - 1;
-
- cir::ConstantOp indexVal =
- builder.getUInt64(index, getLoc(expr->getExprLoc()));
-
- // These builtins exist so we can ensure the index is an ICE and in range.
- // Otherwise we could just do this in the header file.
- return cir::VecExtractOp::create(builder, getLoc(expr->getExprLoc()),
- ops[0], indexVal);
- }
- case X86::BI__builtin_ia32_vec_set_v4hi:
- case X86::BI__builtin_ia32_vec_set_v16qi:
- case X86::BI__builtin_ia32_vec_set_v8hi:
- case X86::BI__builtin_ia32_vec_set_v4si:
- case X86::BI__builtin_ia32_vec_set_v2di:
- case X86::BI__builtin_ia32_vec_set_v32qi:
- case X86::BI__builtin_ia32_vec_set_v16hi:
- case X86::BI__builtin_ia32_vec_set_v8si:
- case X86::BI__builtin_ia32_vec_set_v4di: {
- return emitVecInsert(builder, getLoc(expr->getExprLoc()), ops[0], ops[1],
- ops[2]);
- }
- case X86::BI__builtin_ia32_kunpckhi:
- return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.kunpackb", ops);
- case X86::BI__builtin_ia32_kunpcksi:
- return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.kunpackw", ops);
- case X86::BI__builtin_ia32_kunpckdi:
- return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.kunpackd", ops);
- case X86::BI_mm_setcsr:
- case X86::BI__builtin_ia32_ldmxcsr: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- Address tmp = createMemTemp(expr->getArg(0)->getType(), loc);
- builder.createStore(loc, ops[0], tmp);
- return emitIntrinsicCallOp(builder, loc, "x86.sse.ldmxcsr",
- builder.getVoidTy(), tmp.getPointer());
- }
- case X86::BI_mm_getcsr:
- case X86::BI__builtin_ia32_stmxcsr: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- Address tmp = createMemTemp(expr->getType(), loc);
- emitIntrinsicCallOp(builder, loc, "x86.sse.stmxcsr", builder.getVoidTy(),
- tmp.getPointer());
- return builder.createLoad(loc, tmp);
- }
- case X86::BI__builtin_ia32_xsave:
- case X86::BI__builtin_ia32_xsave64:
- case X86::BI__builtin_ia32_xrstor:
- case X86::BI__builtin_ia32_xrstor64:
- case X86::BI__builtin_ia32_xsaveopt:
- case X86::BI__builtin_ia32_xsaveopt64:
- case X86::BI__builtin_ia32_xrstors:
- case X86::BI__builtin_ia32_xrstors64:
- case X86::BI__builtin_ia32_xsavec:
- case X86::BI__builtin_ia32_xsavec64:
- case X86::BI__builtin_ia32_xsaves:
- case X86::BI__builtin_ia32_xsaves64:
- case X86::BI__builtin_ia32_xsetbv:
- case X86::BI_xsetbv:
- case X86::BI__builtin_ia32_xgetbv:
- case X86::BI_xgetbv:
- case X86::BI__builtin_ia32_storedqudi128_mask:
- case X86::BI__builtin_ia32_storedqusi128_mask:
- case X86::BI__builtin_ia32_storedquhi128_mask:
- case X86::BI__builtin_ia32_storedquqi128_mask:
- case X86::BI__builtin_ia32_storeupd128_mask:
- case X86::BI__builtin_ia32_storeups128_mask:
- case X86::BI__builtin_ia32_storedqudi256_mask:
- case X86::BI__builtin_ia32_storedqusi256_mask:
- case X86::BI__builtin_ia32_storedquhi256_mask:
- case X86::BI__builtin_ia32_storedquqi256_mask:
- case X86::BI__builtin_ia32_storeupd256_mask:
- case X86::BI__builtin_ia32_storeups256_mask:
- case X86::BI__builtin_ia32_storedqudi512_mask:
- case X86::BI__builtin_ia32_storedqusi512_mask:
- case X86::BI__builtin_ia32_storedquhi512_mask:
- case X86::BI__builtin_ia32_storedquqi512_mask:
- case X86::BI__builtin_ia32_storeupd512_mask:
- case X86::BI__builtin_ia32_storeups512_mask:
- case X86::BI__builtin_ia32_storesbf16128_mask:
- case X86::BI__builtin_ia32_storesh128_mask:
- case X86::BI__builtin_ia32_storess128_mask:
- case X86::BI__builtin_ia32_storesd128_mask:
- case X86::BI__builtin_ia32_cvtmask2b128:
- case X86::BI__builtin_ia32_cvtmask2b256:
- case X86::BI__builtin_ia32_cvtmask2b512:
- case X86::BI__builtin_ia32_cvtmask2w128:
- case X86::BI__builtin_ia32_cvtmask2w256:
- case X86::BI__builtin_ia32_cvtmask2w512:
- case X86::BI__builtin_ia32_cvtmask2d128:
- case X86::BI__builtin_ia32_cvtmask2d256:
- case X86::BI__builtin_ia32_cvtmask2d512:
- case X86::BI__builtin_ia32_cvtmask2q128:
- case X86::BI__builtin_ia32_cvtmask2q256:
- case X86::BI__builtin_ia32_cvtmask2q512:
- case X86::BI__builtin_ia32_cvtb2mask128:
- case X86::BI__builtin_ia32_cvtb2mask256:
- case X86::BI__builtin_ia32_cvtb2mask512:
- case X86::BI__builtin_ia32_cvtw2mask128:
- case X86::BI__builtin_ia32_cvtw2mask256:
- case X86::BI__builtin_ia32_cvtw2mask512:
- case X86::BI__builtin_ia32_cvtd2mask128:
- case X86::BI__builtin_ia32_cvtd2mask256:
- case X86::BI__builtin_ia32_cvtd2mask512:
- case X86::BI__builtin_ia32_cvtq2mask128:
- case X86::BI__builtin_ia32_cvtq2mask256:
- case X86::BI__builtin_ia32_cvtq2mask512:
- case X86::BI__builtin_ia32_cvtdq2ps512_mask:
- case X86::BI__builtin_ia32_cvtqq2ps512_mask:
- case X86::BI__builtin_ia32_cvtqq2pd512_mask:
- case X86::BI__builtin_ia32_vcvtw2ph512_mask:
- case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
- case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
- case X86::BI__builtin_ia32_cvtudq2ps512_mask:
- case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
- case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
- case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
- case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
- case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
- case X86::BI__builtin_ia32_vfmaddsh3_mask:
- case X86::BI__builtin_ia32_vfmaddss3_mask:
- case X86::BI__builtin_ia32_vfmaddsd3_mask:
- case X86::BI__builtin_ia32_vfmaddsh3_maskz:
- case X86::BI__builtin_ia32_vfmaddss3_maskz:
- case X86::BI__builtin_ia32_vfmaddsd3_maskz:
- case X86::BI__builtin_ia32_vfmaddsh3_mask3:
- case X86::BI__builtin_ia32_vfmaddss3_mask3:
- case X86::BI__builtin_ia32_vfmaddsd3_mask3:
- case X86::BI__builtin_ia32_vfmsubsh3_mask3:
- case X86::BI__builtin_ia32_vfmsubss3_mask3:
- case X86::BI__builtin_ia32_vfmsubsd3_mask3:
- case X86::BI__builtin_ia32_vfmaddph512_mask:
- case X86::BI__builtin_ia32_vfmaddph512_maskz:
- case X86::BI__builtin_ia32_vfmaddph512_mask3:
- case X86::BI__builtin_ia32_vfmaddps512_mask:
- case X86::BI__builtin_ia32_vfmaddps512_maskz:
- case X86::BI__builtin_ia32_vfmaddps512_mask3:
- case X86::BI__builtin_ia32_vfmsubps512_mask3:
- case X86::BI__builtin_ia32_vfmaddpd512_mask:
- case X86::BI__builtin_ia32_vfmaddpd512_maskz:
- case X86::BI__builtin_ia32_vfmaddpd512_mask3:
- case X86::BI__builtin_ia32_vfmsubpd512_mask3:
- case X86::BI__builtin_ia32_vfmsubph512_mask3:
- case X86::BI__builtin_ia32_vfmaddsubph512_mask:
- case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
- case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
- case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
- case X86::BI__builtin_ia32_vfmaddsubps512_mask:
- case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
- case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
- case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
- case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
- case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
- case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
- case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
- case X86::BI__builtin_ia32_movdqa32store128_mask:
- case X86::BI__builtin_ia32_movdqa64store128_mask:
- case X86::BI__builtin_ia32_storeaps128_mask:
- case X86::BI__builtin_ia32_storeapd128_mask:
- case X86::BI__builtin_ia32_movdqa32store256_mask:
- case X86::BI__builtin_ia32_movdqa64store256_mask:
- case X86::BI__builtin_ia32_storeaps256_mask:
- case X86::BI__builtin_ia32_storeapd256_mask:
- case X86::BI__builtin_ia32_movdqa32store512_mask:
- case X86::BI__builtin_ia32_movdqa64store512_mask:
- case X86::BI__builtin_ia32_storeaps512_mask:
- case X86::BI__builtin_ia32_storeapd512_mask:
- case X86::BI__builtin_ia32_loadups128_mask:
- case X86::BI__builtin_ia32_loadups256_mask:
- case X86::BI__builtin_ia32_loadups512_mask:
- case X86::BI__builtin_ia32_loadupd128_mask:
- case X86::BI__builtin_ia32_loadupd256_mask:
- case X86::BI__builtin_ia32_loadupd512_mask:
- case X86::BI__builtin_ia32_loaddquqi128_mask:
- case X86::BI__builtin_ia32_loaddquqi256_mask:
- case X86::BI__builtin_ia32_loaddquqi512_mask:
- case X86::BI__builtin_ia32_loaddquhi128_mask:
- case X86::BI__builtin_ia32_loaddquhi256_mask:
- case X86::BI__builtin_ia32_loaddquhi512_mask:
- case X86::BI__builtin_ia32_loaddqusi128_mask:
- case X86::BI__builtin_ia32_loaddqusi256_mask:
- case X86::BI__builtin_ia32_loaddqusi512_mask:
- case X86::BI__builtin_ia32_loaddqudi128_mask:
- case X86::BI__builtin_ia32_loaddqudi256_mask:
- case X86::BI__builtin_ia32_loaddqudi512_mask:
- case X86::BI__builtin_ia32_loadsbf16128_mask:
- case X86::BI__builtin_ia32_loadsh128_mask:
- case X86::BI__builtin_ia32_loadss128_mask:
- case X86::BI__builtin_ia32_loadsd128_mask:
- case X86::BI__builtin_ia32_loadaps128_mask:
- case X86::BI__builtin_ia32_loadaps256_mask:
- case X86::BI__builtin_ia32_loadaps512_mask:
- case X86::BI__builtin_ia32_loadapd128_mask:
- case X86::BI__builtin_ia32_loadapd256_mask:
- case X86::BI__builtin_ia32_loadapd512_mask:
- case X86::BI__builtin_ia32_movdqa32load128_mask:
- case X86::BI__builtin_ia32_movdqa32load256_mask:
- case X86::BI__builtin_ia32_movdqa32load512_mask:
- case X86::BI__builtin_ia32_movdqa64load128_mask:
- case X86::BI__builtin_ia32_movdqa64load256_mask:
- case X86::BI__builtin_ia32_movdqa64load512_mask:
- case X86::BI__builtin_ia32_expandloaddf128_mask:
- case X86::BI__builtin_ia32_expandloaddf256_mask:
- case X86::BI__builtin_ia32_expandloaddf512_mask:
- case X86::BI__builtin_ia32_expandloadsf128_mask:
- case X86::BI__builtin_ia32_expandloadsf256_mask:
- case X86::BI__builtin_ia32_expandloadsf512_mask:
- case X86::BI__builtin_ia32_expandloaddi128_mask:
- case X86::BI__builtin_ia32_expandloaddi256_mask:
- case X86::BI__builtin_ia32_expandloaddi512_mask:
- case X86::BI__builtin_ia32_expandloadsi128_mask:
- case X86::BI__builtin_ia32_expandloadsi256_mask:
- case X86::BI__builtin_ia32_expandloadsi512_mask:
- case X86::BI__builtin_ia32_expandloadhi128_mask:
- case X86::BI__builtin_ia32_expandloadhi256_mask:
- case X86::BI__builtin_ia32_expandloadhi512_mask:
- case X86::BI__builtin_ia32_expandloadqi128_mask:
- case X86::BI__builtin_ia32_expandloadqi256_mask:
- case X86::BI__builtin_ia32_expandloadqi512_mask:
- case X86::BI__builtin_ia32_compressstoredf128_mask:
- case X86::BI__builtin_ia32_compressstoredf256_mask:
- case X86::BI__builtin_ia32_compressstoredf512_mask:
- case X86::BI__builtin_ia32_compressstoresf128_mask:
- case X86::BI__builtin_ia32_compressstoresf256_mask:
- case X86::BI__builtin_ia32_compressstoresf512_mask:
- case X86::BI__builtin_ia32_compressstoredi128_mask:
- case X86::BI__builtin_ia32_compressstoredi256_mask:
- case X86::BI__builtin_ia32_compressstoredi512_mask:
- case X86::BI__builtin_ia32_compressstoresi128_mask:
- case X86::BI__builtin_ia32_compressstoresi256_mask:
- case X86::BI__builtin_ia32_compressstoresi512_mask:
- case X86::BI__builtin_ia32_compressstorehi128_mask:
- case X86::BI__builtin_ia32_compressstorehi256_mask:
- case X86::BI__builtin_ia32_compressstorehi512_mask:
- case X86::BI__builtin_ia32_compressstoreqi128_mask:
- case X86::BI__builtin_ia32_compressstoreqi256_mask:
- case X86::BI__builtin_ia32_compressstoreqi512_mask:
- case X86::BI__builtin_ia32_expanddf128_mask:
- case X86::BI__builtin_ia32_expanddf256_mask:
- case X86::BI__builtin_ia32_expanddf512_mask:
- case X86::BI__builtin_ia32_expandsf128_mask:
- case X86::BI__builtin_ia32_expandsf256_mask:
- case X86::BI__builtin_ia32_expandsf512_mask:
- case X86::BI__builtin_ia32_expanddi128_mask:
- case X86::BI__builtin_ia32_expanddi256_mask:
- case X86::BI__builtin_ia32_expanddi512_mask:
- case X86::BI__builtin_ia32_expandsi128_mask:
- case X86::BI__builtin_ia32_expandsi256_mask:
- case X86::BI__builtin_ia32_expandsi512_mask:
- case X86::BI__builtin_ia32_expandhi128_mask:
- case X86::BI__builtin_ia32_expandhi256_mask:
- case X86::BI__builtin_ia32_expandhi512_mask:
- case X86::BI__builtin_ia32_expandqi128_mask:
- case X86::BI__builtin_ia32_expandqi256_mask:
- case X86::BI__builtin_ia32_expandqi512_mask:
- case X86::BI__builtin_ia32_compressdf128_mask:
- case X86::BI__builtin_ia32_compressdf256_mask:
- case X86::BI__builtin_ia32_compressdf512_mask:
- case X86::BI__builtin_ia32_compresssf128_mask:
- case X86::BI__builtin_ia32_compresssf256_mask:
- case X86::BI__builtin_ia32_compresssf512_mask:
- case X86::BI__builtin_ia32_compressdi128_mask:
- case X86::BI__builtin_ia32_compressdi256_mask:
- case X86::BI__builtin_ia32_compressdi512_mask:
- case X86::BI__builtin_ia32_compresssi128_mask:
- case X86::BI__builtin_ia32_compresssi256_mask:
- case X86::BI__builtin_ia32_compresssi512_mask:
- case X86::BI__builtin_ia32_compresshi128_mask:
- case X86::BI__builtin_ia32_compresshi256_mask:
- case X86::BI__builtin_ia32_compresshi512_mask:
- case X86::BI__builtin_ia32_compressqi128_mask:
- case X86::BI__builtin_ia32_compressqi256_mask:
- case X86::BI__builtin_ia32_compressqi512_mask:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
- case X86::BI__builtin_ia32_gather3div2df:
- case X86::BI__builtin_ia32_gather3div2di:
- case X86::BI__builtin_ia32_gather3div4df:
- case X86::BI__builtin_ia32_gather3div4di:
- case X86::BI__builtin_ia32_gather3div4sf:
- case X86::BI__builtin_ia32_gather3div4si:
- case X86::BI__builtin_ia32_gather3div8sf:
- case X86::BI__builtin_ia32_gather3div8si:
- case X86::BI__builtin_ia32_gather3siv2df:
- case X86::BI__builtin_ia32_gather3siv2di:
- case X86::BI__builtin_ia32_gather3siv4df:
- case X86::BI__builtin_ia32_gather3siv4di:
- case X86::BI__builtin_ia32_gather3siv4sf:
- case X86::BI__builtin_ia32_gather3siv4si:
- case X86::BI__builtin_ia32_gather3siv8sf:
- case X86::BI__builtin_ia32_gather3siv8si:
- case X86::BI__builtin_ia32_gathersiv8df:
- case X86::BI__builtin_ia32_gathersiv16sf:
- case X86::BI__builtin_ia32_gatherdiv8df:
- case X86::BI__builtin_ia32_gatherdiv16sf:
- case X86::BI__builtin_ia32_gathersiv8di:
- case X86::BI__builtin_ia32_gathersiv16si:
- case X86::BI__builtin_ia32_gatherdiv8di:
- case X86::BI__builtin_ia32_gatherdiv16si: {
- StringRef intrinsicName;
- switch (builtinID) {
- default:
- llvm_unreachable("Unexpected builtin");
- case X86::BI__builtin_ia32_gather3div2df:
- intrinsicName = "x86.avx512.mask.gather3div2.df";
- break;
- case X86::BI__builtin_ia32_gather3div2di:
- intrinsicName = "x86.avx512.mask.gather3div2.di";
- break;
- case X86::BI__builtin_ia32_gather3div4df:
- intrinsicName = "x86.avx512.mask.gather3div4.df";
- break;
- case X86::BI__builtin_ia32_gather3div4di:
- intrinsicName = "x86.avx512.mask.gather3div4.di";
- break;
- case X86::BI__builtin_ia32_gather3div4sf:
- intrinsicName = "x86.avx512.mask.gather3div4.sf";
- break;
- case X86::BI__builtin_ia32_gather3div4si:
- intrinsicName = "x86.avx512.mask.gather3div4.si";
- break;
- case X86::BI__builtin_ia32_gather3div8sf:
- intrinsicName = "x86.avx512.mask.gather3div8.sf";
- break;
- case X86::BI__builtin_ia32_gather3div8si:
- intrinsicName = "x86.avx512.mask.gather3div8.si";
- break;
- case X86::BI__builtin_ia32_gather3siv2df:
- intrinsicName = "x86.avx512.mask.gather3siv2.df";
- break;
- case X86::BI__builtin_ia32_gather3siv2di:
- intrinsicName = "x86.avx512.mask.gather3siv2.di";
- break;
- case X86::BI__builtin_ia32_gather3siv4df:
- intrinsicName = "x86.avx512.mask.gather3siv4.df";
- break;
- case X86::BI__builtin_ia32_gather3siv4di:
- intrinsicName = "x86.avx512.mask.gather3siv4.di";
- break;
- case X86::BI__builtin_ia32_gather3siv4sf:
- intrinsicName = "x86.avx512.mask.gather3siv4.sf";
- break;
- case X86::BI__builtin_ia32_gather3siv4si:
- intrinsicName = "x86.avx512.mask.gather3siv4.si";
- break;
- case X86::BI__builtin_ia32_gather3siv8sf:
- intrinsicName = "x86.avx512.mask.gather3siv8.sf";
- break;
- case X86::BI__builtin_ia32_gather3siv8si:
- intrinsicName = "x86.avx512.mask.gather3siv8.si";
- break;
- case X86::BI__builtin_ia32_gathersiv8df:
- intrinsicName = "x86.avx512.mask.gather.dpd.512";
- break;
- case X86::BI__builtin_ia32_gathersiv16sf:
- intrinsicName = "x86.avx512.mask.gather.dps.512";
- break;
- case X86::BI__builtin_ia32_gatherdiv8df:
- intrinsicName = "x86.avx512.mask.gather.qpd.512";
- break;
- case X86::BI__builtin_ia32_gatherdiv16sf:
- intrinsicName = "x86.avx512.mask.gather.qps.512";
- break;
- case X86::BI__builtin_ia32_gathersiv8di:
- intrinsicName = "x86.avx512.mask.gather.dpq.512";
- break;
- case X86::BI__builtin_ia32_gathersiv16si:
- intrinsicName = "x86.avx512.mask.gather.dpi.512";
- break;
- case X86::BI__builtin_ia32_gatherdiv8di:
- intrinsicName = "x86.avx512.mask.gather.qpq.512";
- break;
- case X86::BI__builtin_ia32_gatherdiv16si:
- intrinsicName = "x86.avx512.mask.gather.qpi.512";
- break;
- }
-
- mlir::Location loc = getLoc(expr->getExprLoc());
- unsigned minElts =
- std::min(cast<cir::VectorType>(ops[0].getType()).getSize(),
- cast<cir::VectorType>(ops[2].getType()).getSize());
- ops[3] = getMaskVecValue(builder, loc, ops[3], minElts);
- return emitIntrinsicCallOp(builder, loc, intrinsicName,
- convertType(expr->getType()), ops);
- }
- case X86::BI__builtin_ia32_scattersiv8df:
- case X86::BI__builtin_ia32_scattersiv16sf:
- case X86::BI__builtin_ia32_scatterdiv8df:
- case X86::BI__builtin_ia32_scatterdiv16sf:
- case X86::BI__builtin_ia32_scattersiv8di:
- case X86::BI__builtin_ia32_scattersiv16si:
- case X86::BI__builtin_ia32_scatterdiv8di:
- case X86::BI__builtin_ia32_scatterdiv16si:
- case X86::BI__builtin_ia32_scatterdiv2df:
- case X86::BI__builtin_ia32_scatterdiv2di:
- case X86::BI__builtin_ia32_scatterdiv4df:
- case X86::BI__builtin_ia32_scatterdiv4di:
- case X86::BI__builtin_ia32_scatterdiv4sf:
- case X86::BI__builtin_ia32_scatterdiv4si:
- case X86::BI__builtin_ia32_scatterdiv8sf:
- case X86::BI__builtin_ia32_scatterdiv8si:
- case X86::BI__builtin_ia32_scattersiv2df:
- case X86::BI__builtin_ia32_scattersiv2di:
- case X86::BI__builtin_ia32_scattersiv4df:
- case X86::BI__builtin_ia32_scattersiv4di:
- case X86::BI__builtin_ia32_scattersiv4sf:
- case X86::BI__builtin_ia32_scattersiv4si:
- case X86::BI__builtin_ia32_scattersiv8sf:
- case X86::BI__builtin_ia32_scattersiv8si: {
- llvm::StringRef intrinsicName;
- switch (builtinID) {
- default:
- llvm_unreachable("Unexpected builtin");
- case X86::BI__builtin_ia32_scattersiv8df:
- intrinsicName = "x86.avx512.mask.scatter.dpd.512";
- break;
- case X86::BI__builtin_ia32_scattersiv16sf:
- intrinsicName = "x86.avx512.mask.scatter.dps.512";
- break;
- case X86::BI__builtin_ia32_scatterdiv8df:
- intrinsicName = "x86.avx512.mask.scatter.qpd.512";
- break;
- case X86::BI__builtin_ia32_scatterdiv16sf:
- intrinsicName = "x86.avx512.mask.scatter.qps.512";
- break;
- case X86::BI__builtin_ia32_scattersiv8di:
- intrinsicName = "x86.avx512.mask.scatter.dpq.512";
- break;
- case X86::BI__builtin_ia32_scattersiv16si:
- intrinsicName = "x86.avx512.mask.scatter.dpi.512";
- break;
- case X86::BI__builtin_ia32_scatterdiv8di:
- intrinsicName = "x86.avx512.mask.scatter.qpq.512";
- break;
- case X86::BI__builtin_ia32_scatterdiv16si:
- intrinsicName = "x86.avx512.mask.scatter.qpi.512";
- break;
- case X86::BI__builtin_ia32_scatterdiv2df:
- intrinsicName = "x86.avx512.mask.scatterdiv2.df";
- break;
- case X86::BI__builtin_ia32_scatterdiv2di:
- intrinsicName = "x86.avx512.mask.scatterdiv2.di";
- break;
- case X86::BI__builtin_ia32_scatterdiv4df:
- intrinsicName = "x86.avx512.mask.scatterdiv4.df";
- break;
- case X86::BI__builtin_ia32_scatterdiv4di:
- intrinsicName = "x86.avx512.mask.scatterdiv4.di";
- break;
- case X86::BI__builtin_ia32_scatterdiv4sf:
- intrinsicName = "x86.avx512.mask.scatterdiv4.sf";
- break;
- case X86::BI__builtin_ia32_scatterdiv4si:
- intrinsicName = "x86.avx512.mask.scatterdiv4.si";
- break;
- case X86::BI__builtin_ia32_scatterdiv8sf:
- intrinsicName = "x86.avx512.mask.scatterdiv8.sf";
- break;
- case X86::BI__builtin_ia32_scatterdiv8si:
- intrinsicName = "x86.avx512.mask.scatterdiv8.si";
- break;
- case X86::BI__builtin_ia32_scattersiv2df:
- intrinsicName = "x86.avx512.mask.scattersiv2.df";
- break;
- case X86::BI__builtin_ia32_scattersiv2di:
- intrinsicName = "x86.avx512.mask.scattersiv2.di";
- break;
- case X86::BI__builtin_ia32_scattersiv4df:
- intrinsicName = "x86.avx512.mask.scattersiv4.df";
- break;
- case X86::BI__builtin_ia32_scattersiv4di:
- intrinsicName = "x86.avx512.mask.scattersiv4.di";
- break;
- case X86::BI__builtin_ia32_scattersiv4sf:
- intrinsicName = "x86.avx512.mask.scattersiv4.sf";
- break;
- case X86::BI__builtin_ia32_scattersiv4si:
- intrinsicName = "x86.avx512.mask.scattersiv4.si";
- break;
- case X86::BI__builtin_ia32_scattersiv8sf:
- intrinsicName = "x86.avx512.mask.scattersiv8.sf";
- break;
- case X86::BI__builtin_ia32_scattersiv8si:
- intrinsicName = "x86.avx512.mask.scattersiv8.si";
- break;
- }
-
- mlir::Location loc = getLoc(expr->getExprLoc());
- unsigned minElts =
- std::min(cast<cir::VectorType>(ops[2].getType()).getSize(),
- cast<cir::VectorType>(ops[3].getType()).getSize());
- ops[1] = getMaskVecValue(builder, loc, ops[1], minElts);
-
- return emitIntrinsicCallOp(builder, loc, intrinsicName,
- convertType(expr->getType()), ops);
- }
- case X86::BI__builtin_ia32_vextractf128_pd256:
- case X86::BI__builtin_ia32_vextractf128_ps256:
- case X86::BI__builtin_ia32_vextractf128_si256:
- case X86::BI__builtin_ia32_extract128i256:
- case X86::BI__builtin_ia32_extractf64x4_mask:
- case X86::BI__builtin_ia32_extractf32x4_mask:
- case X86::BI__builtin_ia32_extracti64x4_mask:
- case X86::BI__builtin_ia32_extracti32x4_mask:
- case X86::BI__builtin_ia32_extractf32x8_mask:
- case X86::BI__builtin_ia32_extracti32x8_mask:
- case X86::BI__builtin_ia32_extractf32x4_256_mask:
- case X86::BI__builtin_ia32_extracti32x4_256_mask:
- case X86::BI__builtin_ia32_extractf64x2_256_mask:
- case X86::BI__builtin_ia32_extracti64x2_256_mask:
- case X86::BI__builtin_ia32_extractf64x2_512_mask:
- case X86::BI__builtin_ia32_extracti64x2_512_mask:
- case X86::BI__builtin_ia32_vinsertf128_pd256:
- case X86::BI__builtin_ia32_vinsertf128_ps256:
- case X86::BI__builtin_ia32_vinsertf128_si256:
- case X86::BI__builtin_ia32_insert128i256:
- case X86::BI__builtin_ia32_insertf64x4:
- case X86::BI__builtin_ia32_insertf32x4:
- case X86::BI__builtin_ia32_inserti64x4:
- case X86::BI__builtin_ia32_inserti32x4:
- case X86::BI__builtin_ia32_insertf32x8:
- case X86::BI__builtin_ia32_inserti32x8:
- case X86::BI__builtin_ia32_insertf32x4_256:
- case X86::BI__builtin_ia32_inserti32x4_256:
- case X86::BI__builtin_ia32_insertf64x2_256:
- case X86::BI__builtin_ia32_inserti64x2_256:
- case X86::BI__builtin_ia32_insertf64x2_512:
- case X86::BI__builtin_ia32_inserti64x2_512:
- case X86::BI__builtin_ia32_pmovqd512_mask:
- case X86::BI__builtin_ia32_pmovwb512_mask:
- case X86::BI__builtin_ia32_pblendw128:
- case X86::BI__builtin_ia32_blendpd:
- case X86::BI__builtin_ia32_blendps:
- case X86::BI__builtin_ia32_blendpd256:
- case X86::BI__builtin_ia32_blendps256:
- case X86::BI__builtin_ia32_pblendw256:
- case X86::BI__builtin_ia32_pblendd128:
- case X86::BI__builtin_ia32_pblendd256:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
- case X86::BI__builtin_ia32_pshuflw:
- case X86::BI__builtin_ia32_pshuflw256:
- case X86::BI__builtin_ia32_pshuflw512:
- return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()),
- true);
- case X86::BI__builtin_ia32_pshufhw:
- case X86::BI__builtin_ia32_pshufhw256:
- case X86::BI__builtin_ia32_pshufhw512:
- return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()),
- false);
- case X86::BI__builtin_ia32_pshufd:
- case X86::BI__builtin_ia32_pshufd256:
- case X86::BI__builtin_ia32_pshufd512:
- case X86::BI__builtin_ia32_vpermilpd:
- case X86::BI__builtin_ia32_vpermilps:
- case X86::BI__builtin_ia32_vpermilpd256:
- case X86::BI__builtin_ia32_vpermilps256:
- case X86::BI__builtin_ia32_vpermilpd512:
- case X86::BI__builtin_ia32_vpermilps512: {
- const uint32_t imm = getSExtIntValueFromConstOp(ops[1]);
-
- llvm::SmallVector<int64_t, 16> mask(16);
- computeFullLaneShuffleMask(*this, ops[0], imm, false, mask);
-
- return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], mask);
- }
- case X86::BI__builtin_ia32_shufpd:
- case X86::BI__builtin_ia32_shufpd256:
- case X86::BI__builtin_ia32_shufpd512:
- case X86::BI__builtin_ia32_shufps:
- case X86::BI__builtin_ia32_shufps256:
- case X86::BI__builtin_ia32_shufps512: {
- const uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
-
- llvm::SmallVector<int64_t, 16> mask(16);
- computeFullLaneShuffleMask(*this, ops[0], imm, true, mask);
-
- return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
- mask);
- }
- case X86::BI__builtin_ia32_permdi256:
- case X86::BI__builtin_ia32_permdf256:
- case X86::BI__builtin_ia32_permdi512:
- case X86::BI__builtin_ia32_permdf512:
- case X86::BI__builtin_ia32_palignr128:
- case X86::BI__builtin_ia32_palignr256:
- case X86::BI__builtin_ia32_palignr512:
- case X86::BI__builtin_ia32_alignd128:
- case X86::BI__builtin_ia32_alignd256:
- case X86::BI__builtin_ia32_alignd512:
- case X86::BI__builtin_ia32_alignq128:
- case X86::BI__builtin_ia32_alignq256:
- case X86::BI__builtin_ia32_alignq512:
- case X86::BI__builtin_ia32_shuf_f32x4_256:
- case X86::BI__builtin_ia32_shuf_f64x2_256:
- case X86::BI__builtin_ia32_shuf_i32x4_256:
- case X86::BI__builtin_ia32_shuf_i64x2_256:
- case X86::BI__builtin_ia32_shuf_f32x4:
- case X86::BI__builtin_ia32_shuf_f64x2:
- case X86::BI__builtin_ia32_shuf_i32x4:
- case X86::BI__builtin_ia32_shuf_i64x2:
- case X86::BI__builtin_ia32_vperm2f128_pd256:
- case X86::BI__builtin_ia32_vperm2f128_ps256:
- case X86::BI__builtin_ia32_vperm2f128_si256:
- case X86::BI__builtin_ia32_permti256:
- case X86::BI__builtin_ia32_pslldqi128_byteshift:
- case X86::BI__builtin_ia32_pslldqi256_byteshift:
- case X86::BI__builtin_ia32_pslldqi512_byteshift:
- case X86::BI__builtin_ia32_psrldqi128_byteshift:
- case X86::BI__builtin_ia32_psrldqi256_byteshift:
- case X86::BI__builtin_ia32_psrldqi512_byteshift:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
- case X86::BI__builtin_ia32_kshiftliqi:
- case X86::BI__builtin_ia32_kshiftlihi:
- case X86::BI__builtin_ia32_kshiftlisi:
- case X86::BI__builtin_ia32_kshiftlidi: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- unsigned shiftVal =
- ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
- 0xff;
- unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
-
- if (shiftVal >= numElems)
- return builder.getNullValue(ops[0].getType(), loc);
-
- mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems);
-
- SmallVector<mlir::Attribute, 64> indices;
- mlir::Type i32Ty = builder.getSInt32Ty();
- for (auto i : llvm::seq<unsigned>(0, numElems))
- indices.push_back(cir::IntAttr::get(i32Ty, numElems + i - shiftVal));
-
- mlir::Value zero = builder.getNullValue(in.getType(), loc);
- mlir::Value sv = builder.createVecShuffle(loc, zero, in, indices);
- return builder.createBitcast(sv, ops[0].getType());
- }
- case X86::BI__builtin_ia32_kshiftriqi:
- case X86::BI__builtin_ia32_kshiftrihi:
- case X86::BI__builtin_ia32_kshiftrisi:
- case X86::BI__builtin_ia32_kshiftridi: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- unsigned shiftVal =
- ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
- 0xff;
- unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
-
- if (shiftVal >= numElems)
- return builder.getNullValue(ops[0].getType(), loc);
-
- mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems);
-
- SmallVector<mlir::Attribute, 64> indices;
- mlir::Type i32Ty = builder.getSInt32Ty();
- for (auto i : llvm::seq<unsigned>(0, numElems))
- indices.push_back(cir::IntAttr::get(i32Ty, i + shiftVal));
-
- mlir::Value zero = builder.getNullValue(in.getType(), loc);
- mlir::Value sv = builder.createVecShuffle(loc, in, zero, indices);
- return builder.createBitcast(sv, ops[0].getType());
- }
- case X86::BI__builtin_ia32_vprotbi:
- case X86::BI__builtin_ia32_vprotwi:
- case X86::BI__builtin_ia32_vprotdi:
- case X86::BI__builtin_ia32_vprotqi:
- case X86::BI__builtin_ia32_prold128:
- case X86::BI__builtin_ia32_prold256:
- case X86::BI__builtin_ia32_prold512:
- case X86::BI__builtin_ia32_prolq128:
- case X86::BI__builtin_ia32_prolq256:
- case X86::BI__builtin_ia32_prolq512:
- return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0],
- ops[0], ops[1], false);
- case X86::BI__builtin_ia32_prord128:
- case X86::BI__builtin_ia32_prord256:
- case X86::BI__builtin_ia32_prord512:
- case X86::BI__builtin_ia32_prorq128:
- case X86::BI__builtin_ia32_prorq256:
- case X86::BI__builtin_ia32_prorq512:
- return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0],
- ops[0], ops[1], true);
- case X86::BI__builtin_ia32_selectb_128:
- case X86::BI__builtin_ia32_selectb_256:
- case X86::BI__builtin_ia32_selectb_512:
- case X86::BI__builtin_ia32_selectw_128:
- case X86::BI__builtin_ia32_selectw_256:
- case X86::BI__builtin_ia32_selectw_512:
- case X86::BI__builtin_ia32_selectd_128:
- case X86::BI__builtin_ia32_selectd_256:
- case X86::BI__builtin_ia32_selectd_512:
- case X86::BI__builtin_ia32_selectq_128:
- case X86::BI__builtin_ia32_selectq_256:
- case X86::BI__builtin_ia32_selectq_512:
- case X86::BI__builtin_ia32_selectph_128:
- case X86::BI__builtin_ia32_selectph_256:
- case X86::BI__builtin_ia32_selectph_512:
- case X86::BI__builtin_ia32_selectpbf_128:
- case X86::BI__builtin_ia32_selectpbf_256:
- case X86::BI__builtin_ia32_selectpbf_512:
- case X86::BI__builtin_ia32_selectps_128:
- case X86::BI__builtin_ia32_selectps_256:
- case X86::BI__builtin_ia32_selectps_512:
- case X86::BI__builtin_ia32_selectpd_128:
- case X86::BI__builtin_ia32_selectpd_256:
- case X86::BI__builtin_ia32_selectpd_512:
- case X86::BI__builtin_ia32_selectsh_128:
- case X86::BI__builtin_ia32_selectsbf_128:
- case X86::BI__builtin_ia32_selectss_128:
- case X86::BI__builtin_ia32_selectsd_128:
- case X86::BI__builtin_ia32_cmpb128_mask:
- case X86::BI__builtin_ia32_cmpb256_mask:
- case X86::BI__builtin_ia32_cmpb512_mask:
- case X86::BI__builtin_ia32_cmpw128_mask:
- case X86::BI__builtin_ia32_cmpw256_mask:
- case X86::BI__builtin_ia32_cmpw512_mask:
- case X86::BI__builtin_ia32_cmpd128_mask:
- case X86::BI__builtin_ia32_cmpd256_mask:
- case X86::BI__builtin_ia32_cmpd512_mask:
- case X86::BI__builtin_ia32_cmpq128_mask:
- case X86::BI__builtin_ia32_cmpq256_mask:
- case X86::BI__builtin_ia32_cmpq512_mask:
- case X86::BI__builtin_ia32_ucmpb128_mask:
- case X86::BI__builtin_ia32_ucmpb256_mask:
- case X86::BI__builtin_ia32_ucmpb512_mask:
- case X86::BI__builtin_ia32_ucmpw128_mask:
- case X86::BI__builtin_ia32_ucmpw256_mask:
- case X86::BI__builtin_ia32_ucmpw512_mask:
- case X86::BI__builtin_ia32_ucmpd128_mask:
- case X86::BI__builtin_ia32_ucmpd256_mask:
- case X86::BI__builtin_ia32_ucmpd512_mask:
- case X86::BI__builtin_ia32_ucmpq128_mask:
- case X86::BI__builtin_ia32_ucmpq256_mask:
- case X86::BI__builtin_ia32_ucmpq512_mask:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
- case X86::BI__builtin_ia32_vpcomb:
- case X86::BI__builtin_ia32_vpcomw:
- case X86::BI__builtin_ia32_vpcomd:
- case X86::BI__builtin_ia32_vpcomq:
- return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true);
- case X86::BI__builtin_ia32_vpcomub:
- case X86::BI__builtin_ia32_vpcomuw:
- case X86::BI__builtin_ia32_vpcomud:
- case X86::BI__builtin_ia32_vpcomuq:
- return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false);
- case X86::BI__builtin_ia32_kortestcqi:
- case X86::BI__builtin_ia32_kortestchi:
- case X86::BI__builtin_ia32_kortestcsi:
- case X86::BI__builtin_ia32_kortestcdi: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- cir::IntType ty = cast<cir::IntType>(ops[0].getType());
- mlir::Value allOnesOp =
- builder.getConstAPInt(loc, ty, APInt::getAllOnes(ty.getWidth()));
- mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
- mlir::Value cmp =
- cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allOnesOp);
- return builder.createCast(cir::CastKind::bool_to_int, cmp,
- cgm.convertType(expr->getType()));
- }
- case X86::BI__builtin_ia32_kortestzqi:
- case X86::BI__builtin_ia32_kortestzhi:
- case X86::BI__builtin_ia32_kortestzsi:
- case X86::BI__builtin_ia32_kortestzdi: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- cir::IntType ty = cast<cir::IntType>(ops[0].getType());
- mlir::Value allZerosOp = builder.getNullValue(ty, loc).getResult();
- mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
- mlir::Value cmp =
- cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allZerosOp);
- return builder.createCast(cir::CastKind::bool_to_int, cmp,
- cgm.convertType(expr->getType()));
- }
- case X86::BI__builtin_ia32_ktestcqi:
- return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.ktestc.b", ops);
- case X86::BI__builtin_ia32_ktestzqi:
- return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.ktestz.b", ops);
- case X86::BI__builtin_ia32_ktestchi:
- return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.ktestc.w", ops);
- case X86::BI__builtin_ia32_ktestzhi:
- return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.ktestz.w", ops);
- case X86::BI__builtin_ia32_ktestcsi:
- return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.ktestc.d", ops);
- case X86::BI__builtin_ia32_ktestzsi:
- return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.ktestz.d", ops);
- case X86::BI__builtin_ia32_ktestcdi:
- return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.ktestc.q", ops);
- case X86::BI__builtin_ia32_ktestzdi:
- return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.ktestz.q", ops);
- case X86::BI__builtin_ia32_kaddqi:
- return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.kadd.b", ops);
- case X86::BI__builtin_ia32_kaddhi:
- return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.kadd.w", ops);
- case X86::BI__builtin_ia32_kaddsi:
- return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.kadd.d", ops);
- case X86::BI__builtin_ia32_kadddi:
- return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
- "x86.avx512.kadd.q", ops);
- case X86::BI__builtin_ia32_kandqi:
- case X86::BI__builtin_ia32_kandhi:
- case X86::BI__builtin_ia32_kandsi:
- case X86::BI__builtin_ia32_kanddi:
- return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
- cir::BinOpKind::And, ops);
- case X86::BI__builtin_ia32_kandnqi:
- case X86::BI__builtin_ia32_kandnhi:
- case X86::BI__builtin_ia32_kandnsi:
- case X86::BI__builtin_ia32_kandndi:
- return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
- cir::BinOpKind::And, ops, true);
- case X86::BI__builtin_ia32_korqi:
- case X86::BI__builtin_ia32_korhi:
- case X86::BI__builtin_ia32_korsi:
- case X86::BI__builtin_ia32_kordi:
- return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
- cir::BinOpKind::Or, ops);
- case X86::BI__builtin_ia32_kxnorqi:
- case X86::BI__builtin_ia32_kxnorhi:
- case X86::BI__builtin_ia32_kxnorsi:
- case X86::BI__builtin_ia32_kxnordi:
- return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
- cir::BinOpKind::Xor, ops, true);
- case X86::BI__builtin_ia32_kxorqi:
- case X86::BI__builtin_ia32_kxorhi:
- case X86::BI__builtin_ia32_kxorsi:
- case X86::BI__builtin_ia32_kxordi:
- return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
- cir::BinOpKind::Xor, ops);
- case X86::BI__builtin_ia32_knotqi:
- case X86::BI__builtin_ia32_knothi:
- case X86::BI__builtin_ia32_knotsi:
- case X86::BI__builtin_ia32_knotdi: {
- cir::IntType intTy = cast<cir::IntType>(ops[0].getType());
- unsigned numElts = intTy.getWidth();
- mlir::Value resVec =
- getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts);
- return builder.createBitcast(builder.createNot(resVec), ops[0].getType());
- }
- case X86::BI__builtin_ia32_kmovb:
- case X86::BI__builtin_ia32_kmovw:
- case X86::BI__builtin_ia32_kmovd:
- case X86::BI__builtin_ia32_kmovq: {
- // Bitcast to vXi1 type and then back to integer. This gets the mask
- // register type into the IR, but might be optimized out depending on
- // what's around it.
- cir::IntType intTy = cast<cir::IntType>(ops[0].getType());
- unsigned numElts = intTy.getWidth();
- mlir::Value resVec =
- getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts);
- return builder.createBitcast(resVec, ops[0].getType());
- }
- case X86::BI__builtin_ia32_sqrtsh_round_mask:
- case X86::BI__builtin_ia32_sqrtsd_round_mask:
- case X86::BI__builtin_ia32_sqrtss_round_mask:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
- case X86::BI__builtin_ia32_sqrtph512:
- case X86::BI__builtin_ia32_sqrtps512:
- case X86::BI__builtin_ia32_sqrtpd512: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- mlir::Value arg = ops[0];
- return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult();
- }
- case X86::BI__builtin_ia32_pmuludq128:
- case X86::BI__builtin_ia32_pmuludq256:
- case X86::BI__builtin_ia32_pmuludq512: {
- unsigned opTypePrimitiveSizeInBits =
- cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
- return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ false,
- ops, opTypePrimitiveSizeInBits);
- }
- case X86::BI__builtin_ia32_pmuldq128:
- case X86::BI__builtin_ia32_pmuldq256:
- case X86::BI__builtin_ia32_pmuldq512: {
- unsigned opTypePrimitiveSizeInBits =
- cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
- return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ true,
- ops, opTypePrimitiveSizeInBits);
- }
- case X86::BI__builtin_ia32_pternlogd512_mask:
- case X86::BI__builtin_ia32_pternlogq512_mask:
- case X86::BI__builtin_ia32_pternlogd128_mask:
- case X86::BI__builtin_ia32_pternlogd256_mask:
- case X86::BI__builtin_ia32_pternlogq128_mask:
- case X86::BI__builtin_ia32_pternlogq256_mask:
- case X86::BI__builtin_ia32_pternlogd512_maskz:
- case X86::BI__builtin_ia32_pternlogq512_maskz:
- case X86::BI__builtin_ia32_pternlogd128_maskz:
- case X86::BI__builtin_ia32_pternlogd256_maskz:
- case X86::BI__builtin_ia32_pternlogq128_maskz:
- case X86::BI__builtin_ia32_pternlogq256_maskz:
- case X86::BI__builtin_ia32_vpshldd128:
- case X86::BI__builtin_ia32_vpshldd256:
- case X86::BI__builtin_ia32_vpshldd512:
- case X86::BI__builtin_ia32_vpshldq128:
- case X86::BI__builtin_ia32_vpshldq256:
- case X86::BI__builtin_ia32_vpshldq512:
- case X86::BI__builtin_ia32_vpshldw128:
- case X86::BI__builtin_ia32_vpshldw256:
- case X86::BI__builtin_ia32_vpshldw512:
- case X86::BI__builtin_ia32_vpshrdd128:
- case X86::BI__builtin_ia32_vpshrdd256:
- case X86::BI__builtin_ia32_vpshrdd512:
- case X86::BI__builtin_ia32_vpshrdq128:
- case X86::BI__builtin_ia32_vpshrdq256:
- case X86::BI__builtin_ia32_vpshrdq512:
- case X86::BI__builtin_ia32_vpshrdw128:
- case X86::BI__builtin_ia32_vpshrdw256:
- case X86::BI__builtin_ia32_vpshrdw512:
- case X86::BI__builtin_ia32_reduce_fadd_pd512:
- case X86::BI__builtin_ia32_reduce_fadd_ps512:
- case X86::BI__builtin_ia32_reduce_fadd_ph512:
- case X86::BI__builtin_ia32_reduce_fadd_ph256:
- case X86::BI__builtin_ia32_reduce_fadd_ph128:
- case X86::BI__builtin_ia32_reduce_fmul_pd512:
- case X86::BI__builtin_ia32_reduce_fmul_ps512:
- case X86::BI__builtin_ia32_reduce_fmul_ph512:
- case X86::BI__builtin_ia32_reduce_fmul_ph256:
- case X86::BI__builtin_ia32_reduce_fmul_ph128:
- case X86::BI__builtin_ia32_reduce_fmax_pd512:
- case X86::BI__builtin_ia32_reduce_fmax_ps512:
- case X86::BI__builtin_ia32_reduce_fmax_ph512:
- case X86::BI__builtin_ia32_reduce_fmax_ph256:
- case X86::BI__builtin_ia32_reduce_fmax_ph128:
- case X86::BI__builtin_ia32_reduce_fmin_pd512:
- case X86::BI__builtin_ia32_reduce_fmin_ps512:
- case X86::BI__builtin_ia32_reduce_fmin_ph512:
- case X86::BI__builtin_ia32_reduce_fmin_ph256:
- case X86::BI__builtin_ia32_reduce_fmin_ph128:
- case X86::BI__builtin_ia32_rdrand16_step:
- case X86::BI__builtin_ia32_rdrand32_step:
- case X86::BI__builtin_ia32_rdrand64_step:
- case X86::BI__builtin_ia32_rdseed16_step:
- case X86::BI__builtin_ia32_rdseed32_step:
- case X86::BI__builtin_ia32_rdseed64_step:
- case X86::BI__builtin_ia32_addcarryx_u32:
- case X86::BI__builtin_ia32_addcarryx_u64:
- case X86::BI__builtin_ia32_subborrow_u32:
- case X86::BI__builtin_ia32_subborrow_u64:
- case X86::BI__builtin_ia32_fpclassps128_mask:
- case X86::BI__builtin_ia32_fpclassps256_mask:
- case X86::BI__builtin_ia32_fpclassps512_mask:
- case X86::BI__builtin_ia32_vfpclassbf16128_mask:
- case X86::BI__builtin_ia32_vfpclassbf16256_mask:
- case X86::BI__builtin_ia32_vfpclassbf16512_mask:
- case X86::BI__builtin_ia32_fpclassph128_mask:
- case X86::BI__builtin_ia32_fpclassph256_mask:
- case X86::BI__builtin_ia32_fpclassph512_mask:
- case X86::BI__builtin_ia32_fpclasspd128_mask:
- case X86::BI__builtin_ia32_fpclasspd256_mask:
- case X86::BI__builtin_ia32_fpclasspd512_mask:
- case X86::BI__builtin_ia32_vp2intersect_q_512:
- case X86::BI__builtin_ia32_vp2intersect_q_256:
- case X86::BI__builtin_ia32_vp2intersect_q_128:
- case X86::BI__builtin_ia32_vp2intersect_d_512:
- case X86::BI__builtin_ia32_vp2intersect_d_256:
- case X86::BI__builtin_ia32_vp2intersect_d_128:
- case X86::BI__builtin_ia32_vpmultishiftqb128:
- case X86::BI__builtin_ia32_vpmultishiftqb256:
- case X86::BI__builtin_ia32_vpmultishiftqb512:
- case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
- case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
- case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
- case X86::BI__builtin_ia32_cmpeqps:
- case X86::BI__builtin_ia32_cmpeqpd:
- case X86::BI__builtin_ia32_cmpltps:
- case X86::BI__builtin_ia32_cmpltpd:
- case X86::BI__builtin_ia32_cmpleps:
- case X86::BI__builtin_ia32_cmplepd:
- case X86::BI__builtin_ia32_cmpunordps:
- case X86::BI__builtin_ia32_cmpunordpd:
- case X86::BI__builtin_ia32_cmpneqps:
- case X86::BI__builtin_ia32_cmpneqpd:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
- case X86::BI__builtin_ia32_cmpnltps:
- case X86::BI__builtin_ia32_cmpnltpd:
- return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
- cir::CmpOpKind::lt, /*shouldInvert=*/true);
- case X86::BI__builtin_ia32_cmpnleps:
- case X86::BI__builtin_ia32_cmpnlepd:
- return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
- cir::CmpOpKind::le, /*shouldInvert=*/true);
- case X86::BI__builtin_ia32_cmpordps:
- case X86::BI__builtin_ia32_cmpordpd:
- case X86::BI__builtin_ia32_cmpph128_mask:
- case X86::BI__builtin_ia32_cmpph256_mask:
- case X86::BI__builtin_ia32_cmpph512_mask:
- case X86::BI__builtin_ia32_cmpps128_mask:
- case X86::BI__builtin_ia32_cmpps256_mask:
- case X86::BI__builtin_ia32_cmpps512_mask:
- case X86::BI__builtin_ia32_cmppd128_mask:
- case X86::BI__builtin_ia32_cmppd256_mask:
- case X86::BI__builtin_ia32_cmppd512_mask:
- case X86::BI__builtin_ia32_vcmpbf16512_mask:
- case X86::BI__builtin_ia32_vcmpbf16256_mask:
- case X86::BI__builtin_ia32_vcmpbf16128_mask:
- case X86::BI__builtin_ia32_cmpps:
- case X86::BI__builtin_ia32_cmpps256:
- case X86::BI__builtin_ia32_cmppd:
- case X86::BI__builtin_ia32_cmppd256:
- case X86::BI__builtin_ia32_cmpeqss:
- case X86::BI__builtin_ia32_cmpltss:
- case X86::BI__builtin_ia32_cmpless:
- case X86::BI__builtin_ia32_cmpunordss:
- case X86::BI__builtin_ia32_cmpneqss:
- case X86::BI__builtin_ia32_cmpnltss:
- case X86::BI__builtin_ia32_cmpnless:
- case X86::BI__builtin_ia32_cmpordss:
- case X86::BI__builtin_ia32_cmpeqsd:
- case X86::BI__builtin_ia32_cmpltsd:
- case X86::BI__builtin_ia32_cmplesd:
- case X86::BI__builtin_ia32_cmpunordsd:
- case X86::BI__builtin_ia32_cmpneqsd:
- case X86::BI__builtin_ia32_cmpnltsd:
- case X86::BI__builtin_ia32_cmpnlesd:
- case X86::BI__builtin_ia32_cmpordsd:
- case X86::BI__builtin_ia32_vcvtph2ps_mask:
- case X86::BI__builtin_ia32_vcvtph2ps256_mask:
- case X86::BI__builtin_ia32_vcvtph2ps512_mask:
- case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
- case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
- case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
- case X86::BI__cpuid:
- case X86::BI__cpuidex:
- case X86::BI__emul:
- case X86::BI__emulu:
- case X86::BI__mulh:
- case X86::BI__umulh:
- case X86::BI_mul128:
- case X86::BI_umul128:
- case X86::BI__faststorefence:
- case X86::BI__shiftleft128:
- case X86::BI__shiftright128:
- case X86::BI_ReadWriteBarrier:
- case X86::BI_ReadBarrier:
- case X86::BI_WriteBarrier:
- case X86::BI_AddressOfReturnAddress:
- case X86::BI__stosb:
- case X86::BI__ud2:
- case X86::BI__int2c:
- case X86::BI__readfsbyte:
- case X86::BI__readfsword:
- case X86::BI__readfsdword:
- case X86::BI__readfsqword:
- case X86::BI__readgsbyte:
- case X86::BI__readgsword:
- case X86::BI__readgsdword:
- case X86::BI__readgsqword:
- case X86::BI__builtin_ia32_encodekey128_u32:
- case X86::BI__builtin_ia32_encodekey256_u32:
- case X86::BI__builtin_ia32_aesenc128kl_u8:
- case X86::BI__builtin_ia32_aesdec128kl_u8:
- case X86::BI__builtin_ia32_aesenc256kl_u8:
- case X86::BI__builtin_ia32_aesdec256kl_u8:
- case X86::BI__builtin_ia32_aesencwide128kl_u8:
- case X86::BI__builtin_ia32_aesdecwide128kl_u8:
- case X86::BI__builtin_ia32_aesencwide256kl_u8:
- case X86::BI__builtin_ia32_aesdecwide256kl_u8:
- case X86::BI__builtin_ia32_vfcmaddcph512_mask:
- case X86::BI__builtin_ia32_vfmaddcph512_mask:
- case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
- case X86::BI__builtin_ia32_vfmaddcsh_round_mask:
- case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
- case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
- case X86::BI__builtin_ia32_prefetchi:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
- }
-}
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit x86/x86_64 Builtin calls as CIR or a function
+// call to be later resolved.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenBuilder.h"
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+#include "mlir/IR/Location.h"
+#include "mlir/IR/ValueRange.h"
+#include "clang/Basic/Builtins.h"
+#include "clang/Basic/TargetBuiltins.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
+
+using namespace clang;
+using namespace clang::CIRGen;
+
+template <typename... Operands>
+static mlir::Value emitIntrinsicCallOp(CIRGenBuilderTy &builder,
+ mlir::Location loc, const StringRef str,
+ const mlir::Type &resTy,
+ Operands &&...op) {
+ return cir::LLVMIntrinsicCallOp::create(builder, loc,
+ builder.getStringAttr(str), resTy,
+ std::forward<Operands>(op)...)
+ .getResult();
+}
+
+// OG has unordered comparison as a form of optimization in addition to
+// ordered comparison, while CIR doesn't.
+//
+// This means that we can't encode the comparison code of UGT (unordered
+// greater than), at least not at the CIR level.
+//
+// The boolean shouldInvert compensates for this.
+// For example: to get to the comparison code UGT, we pass in
+// emitVectorFCmp (OLE, shouldInvert = true) since OLE is the inverse of UGT.
+
+// There are several ways to support this otherwise:
+// - register extra CmpOpKind for unordered comparison types and build the
+// translation code for
+// to go from CIR -> LLVM dialect. Notice we get this naturally with
+// shouldInvert, benefiting from existing infrastructure, albeit having to
+// generate an extra `not` at CIR).
+// - Just add extra comparison code to a new VecCmpOpKind instead of
+// cluttering CmpOpKind.
+// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered
+// comparison
+// - Just emit the intrinsics call instead of calling this helper, see how the
+// LLVM lowering handles this.
+static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder,
+ llvm::SmallVector<mlir::Value> &ops,
+ mlir::Location loc, cir::CmpOpKind pred,
+ bool shouldInvert) {
+ assert(!cir::MissingFeatures::cgFPOptionsRAII());
+ // TODO(cir): Add isSignaling boolean once emitConstrainedFPCall implemented
+ assert(!cir::MissingFeatures::emitConstrainedFPCall());
+ mlir::Value cmp = builder.createVecCompare(loc, pred, ops[0], ops[1]);
+ mlir::Value bitCast = builder.createBitcast(
+ shouldInvert ? builder.createNot(cmp) : cmp, ops[0].getType());
+ return bitCast;
+}
+
+static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc,
+ mlir::Value mask, unsigned numElems) {
+ auto maskTy = cir::VectorType::get(
+ builder.getUIntNTy(1), cast<cir::IntType>(mask.getType()).getWidth());
+ mlir::Value maskVec = builder.createBitcast(mask, maskTy);
+
+ // If we have less than 8 elements, then the starting mask was an i8 and
+ // we need to extract down to the right number of elements.
+ if (numElems < 8) {
+ SmallVector<mlir::Attribute, 4> indices;
+ mlir::Type i32Ty = builder.getSInt32Ty();
+ for (auto i : llvm::seq<unsigned>(0, numElems))
+ indices.push_back(cir::IntAttr::get(i32Ty, i));
+
+ maskVec = builder.createVecShuffle(loc, maskVec, maskVec, indices);
+ }
+ return maskVec;
+}
+
+// Builds the VecShuffleOp for pshuflw and pshufhw x86 builtins.
+//
+// The vector is split into lanes of 8 word elements (16 bits). The lower or
+// upper half of each lane, controlled by `isLow`, is shuffled in the following
+// way: The immediate is truncated to 8 bits, separated into 4 2-bit fields. The
+// i-th field's value represents the resulting index of the i-th element in the
+// half lane after shuffling. The other half of the lane remains unchanged.
+static cir::VecShuffleOp emitPshufWord(CIRGenBuilderTy &builder,
+ const mlir::Value vec,
+ const mlir::Value immediate,
+ const mlir::Location loc,
+ const bool isLow) {
+ uint32_t imm = CIRGenFunction::getZExtIntValueFromConstOp(immediate);
+
+ auto vecTy = cast<cir::VectorType>(vec.getType());
+ unsigned numElts = vecTy.getSize();
+
+ unsigned firstHalfStart = isLow ? 0 : 4;
+ unsigned secondHalfStart = 4 - firstHalfStart;
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ imm = (imm & 0xff) * 0x01010101;
+
+ int64_t indices[32];
+ for (unsigned l = 0; l != numElts; l += 8) {
+ for (unsigned i = firstHalfStart; i != firstHalfStart + 4; ++i) {
+ indices[l + i] = l + (imm & 3) + firstHalfStart;
+ imm >>= 2;
+ }
+ for (unsigned i = secondHalfStart; i != secondHalfStart + 4; ++i)
+ indices[l + i] = l + i;
+ }
+
+ return builder.createVecShuffle(loc, vec, ArrayRef(indices, numElts));
+}
+
+// Builds the shuffle mask for pshufd and shufpd/shufps x86 builtins.
+// The shuffle mask is written to outIndices.
+static void
+computeFullLaneShuffleMask(CIRGenFunction &cgf, const mlir::Value vec,
+ uint32_t imm, const bool isShufP,
+ llvm::SmallVectorImpl<int64_t> &outIndices) {
+ auto vecTy = cast<cir::VectorType>(vec.getType());
+ unsigned numElts = vecTy.getSize();
+ unsigned numLanes = cgf.cgm.getDataLayout().getTypeSizeInBits(vecTy) / 128;
+ unsigned numLaneElts = numElts / numLanes;
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ imm = (imm & 0xff) * 0x01010101;
+
+ for (unsigned l = 0; l != numElts; l += numLaneElts) {
+ for (unsigned i = 0; i != numLaneElts; ++i) {
+ uint32_t idx = imm % numLaneElts;
+ imm /= numLaneElts;
+ if (isShufP && i >= (numLaneElts / 2))
+ idx += numElts;
+ outIndices[l + i] = l + idx;
+ }
+ }
+
+ outIndices.resize(numElts);
+}
+
+static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
+ mlir::Location loc,
+ const std::string &intrinsicName,
+ SmallVectorImpl<mlir::Value> &ops) {
+
+ auto intTy = cast<cir::IntType>(ops[0].getType());
+ unsigned numElts = intTy.getWidth();
+ mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts);
+ mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts);
+ mlir::Type vecTy = lhsVec.getType();
+ mlir::Value resVec = emitIntrinsicCallOp(builder, loc, intrinsicName, vecTy,
+ mlir::ValueRange{lhsVec, rhsVec});
+ return builder.createBitcast(resVec, ops[0].getType());
+}
+
+static mlir::Value emitX86MaskUnpack(CIRGenBuilderTy &builder,
+ mlir::Location loc,
+ const std::string &intrinsicName,
+ SmallVectorImpl<mlir::Value> &ops) {
+ unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
+
+ // Convert both operands to mask vectors.
+ mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElems);
+ mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElems);
+
+ mlir::Type i32Ty = builder.getSInt32Ty();
+
+ // Create indices for extracting the first half of each vector.
+ SmallVector<mlir::Attribute, 32> halfIndices;
+ for (auto i : llvm::seq<unsigned>(0, numElems / 2))
+ halfIndices.push_back(cir::IntAttr::get(i32Ty, i));
+
+ // Extract first half of each vector. This gives better codegen than
+ // doing it in a single shuffle.
+ mlir::Value lhsHalf = builder.createVecShuffle(loc, lhs, lhs, halfIndices);
+ mlir::Value rhsHalf = builder.createVecShuffle(loc, rhs, rhs, halfIndices);
+
+ // Create indices for concatenating the vectors.
+ // NOTE: Operands are swapped to match the intrinsic definition.
+ // After the half extraction, both vectors have numElems/2 elements.
+ // In createVecShuffle(rhsHalf, lhsHalf, indices), indices [0..numElems/2-1]
+ // select from rhsHalf, and indices [numElems/2..numElems-1] select from
+ // lhsHalf.
+ SmallVector<mlir::Attribute, 64> concatIndices;
+ for (auto i : llvm::seq<unsigned>(0, numElems))
+ concatIndices.push_back(cir::IntAttr::get(i32Ty, i));
+
+ // Concat the vectors (RHS first, then LHS).
+ mlir::Value res =
+ builder.createVecShuffle(loc, rhsHalf, lhsHalf, concatIndices);
+ return builder.createBitcast(res, ops[0].getType());
+}
+
+static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder,
+ mlir::Location loc,
+ cir::BinOpKind binOpKind,
+ SmallVectorImpl<mlir::Value> &ops,
+ bool invertLHS = false) {
+ unsigned numElts = cast<cir::IntType>(ops[0].getType()).getWidth();
+ mlir::Value lhs = getMaskVecValue(builder, loc, ops[0], numElts);
+ mlir::Value rhs = getMaskVecValue(builder, loc, ops[1], numElts);
+
+ if (invertLHS)
+ lhs = builder.createNot(lhs);
+ return builder.createBitcast(builder.createBinop(loc, lhs, binOpKind, rhs),
+ ops[0].getType());
+}
+
+static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc,
+ const std::string &intrinsicName,
+ SmallVectorImpl<mlir::Value> &ops) {
+ auto intTy = cast<cir::IntType>(ops[0].getType());
+ unsigned numElts = intTy.getWidth();
+ mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts);
+ mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts);
+ mlir::Type resTy = builder.getSInt32Ty();
+ return emitIntrinsicCallOp(builder, loc, intrinsicName, resTy,
+ mlir::ValueRange{lhsVec, rhsVec});
+}
+
+static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc,
+ mlir::Value vec, mlir::Value value,
+ mlir::Value indexOp) {
+ unsigned numElts = cast<cir::VectorType>(vec.getType()).getSize();
+
+ uint64_t index =
+ indexOp.getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
+
+ index &= numElts - 1;
+
+ cir::ConstantOp indexVal = builder.getUInt64(index, loc);
+
+ return cir::VecInsertOp::create(builder, loc, vec, value, indexVal);
+}
+
+static mlir::Value emitX86FunnelShift(CIRGenBuilderTy &builder,
+ mlir::Location location, mlir::Value &op0,
+ mlir::Value &op1, mlir::Value &amt,
+ bool isRight) {
+ mlir::Type op0Ty = op0.getType();
+
+ // Amount may be scalar immediate, in which case create a splat vector.
+ // Funnel shifts amounts are treated as modulo and types are all power-of-2
+ // so we only care about the lowest log2 bits anyway.
+ if (amt.getType() != op0Ty) {
+ auto vecTy = mlir::cast<cir::VectorType>(op0Ty);
+ uint64_t numElems = vecTy.getSize();
+
+ auto amtTy = mlir::cast<cir::IntType>(amt.getType());
+ auto vecElemTy = mlir::cast<cir::IntType>(vecTy.getElementType());
+
+ // If signed, cast to the same width but unsigned first to
+ // ensure zero-extension when casting to a bigger unsigned `vecElemeTy`.
+ if (amtTy.isSigned()) {
+ cir::IntType unsignedAmtTy = builder.getUIntNTy(amtTy.getWidth());
+ amt = builder.createIntCast(amt, unsignedAmtTy);
+ }
+ cir::IntType unsignedVecElemType = builder.getUIntNTy(vecElemTy.getWidth());
+ amt = builder.createIntCast(amt, unsignedVecElemType);
+ amt = cir::VecSplatOp::create(
+ builder, location, cir::VectorType::get(unsignedVecElemType, numElems),
+ amt);
+ }
+
+ const StringRef intrinsicName = isRight ? "fshr" : "fshl";
+ return emitIntrinsicCallOp(builder, location, intrinsicName, op0Ty,
+ mlir::ValueRange{op0, op1, amt});
+}
+
+static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
+ bool isSigned,
+ SmallVectorImpl<mlir::Value> &ops,
+ unsigned opTypePrimitiveSizeInBits) {
+ mlir::Type ty = cir::VectorType::get(builder.getSInt64Ty(),
+ opTypePrimitiveSizeInBits / 64);
+ mlir::Value lhs = builder.createBitcast(loc, ops[0], ty);
+ mlir::Value rhs = builder.createBitcast(loc, ops[1], ty);
+ if (isSigned) {
+ cir::ConstantOp shiftAmt =
+ builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32));
+ cir::VecSplatOp shiftSplatVecOp =
+ cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult());
+ mlir::Value shiftSplatValue = shiftSplatVecOp.getResult();
+ // In CIR, right-shift operations are automatically lowered to either an
+ // arithmetic or logical shift depending on the operand type. The purpose
+ // of the shifts here is to propagate the sign bit of the 32-bit input
+ // into the upper bits of each vector lane.
+ lhs = builder.createShift(loc, lhs, shiftSplatValue, true);
+ lhs = builder.createShift(loc, lhs, shiftSplatValue, false);
+ rhs = builder.createShift(loc, rhs, shiftSplatValue, true);
+ rhs = builder.createShift(loc, rhs, shiftSplatValue, false);
+ } else {
+ cir::ConstantOp maskScalar = builder.getConstant(
+ loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff));
+ cir::VecSplatOp mask =
+ cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult());
+ // Clear the upper bits
+ lhs = builder.createAnd(loc, lhs, mask);
+ rhs = builder.createAnd(loc, rhs, mask);
+ }
+ return builder.createMul(loc, lhs, rhs);
+}
+
+static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
+ llvm::SmallVector<mlir::Value> ops,
+ bool isSigned) {
+ mlir::Value op0 = ops[0];
+ mlir::Value op1 = ops[1];
+
+ cir::VectorType ty = cast<cir::VectorType>(op0.getType());
+ cir::IntType elementTy = cast<cir::IntType>(ty.getElementType());
+
+ uint64_t imm = CIRGenFunction::getZExtIntValueFromConstOp(ops[2]) & 0x7;
+
+ cir::CmpOpKind pred;
+ switch (imm) {
+ case 0x0:
+ pred = cir::CmpOpKind::lt;
+ break;
+ case 0x1:
+ pred = cir::CmpOpKind::le;
+ break;
+ case 0x2:
+ pred = cir::CmpOpKind::gt;
+ break;
+ case 0x3:
+ pred = cir::CmpOpKind::ge;
+ break;
+ case 0x4:
+ pred = cir::CmpOpKind::eq;
+ break;
+ case 0x5:
+ pred = cir::CmpOpKind::ne;
+ break;
+ case 0x6:
+ return builder.getNullValue(ty, loc); // FALSE
+ case 0x7: {
+ llvm::APInt allOnes = llvm::APInt::getAllOnes(elementTy.getWidth());
+ return cir::VecSplatOp::create(
+ builder, loc, ty,
+ builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE
+ }
+ default:
+ llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
+ }
+
+ if ((!isSigned && elementTy.isSigned()) ||
+ (isSigned && elementTy.isUnsigned())) {
+ elementTy = elementTy.isSigned() ? builder.getUIntNTy(elementTy.getWidth())
+ : builder.getSIntNTy(elementTy.getWidth());
+ ty = cir::VectorType::get(elementTy, ty.getSize());
+ op0 = builder.createBitcast(op0, ty);
+ op1 = builder.createBitcast(op1, ty);
+ }
+
+ return builder.createVecCompare(loc, pred, op0, op1);
+}
+
+mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
+ const CallExpr *expr) {
+ if (builtinID == Builtin::BI__builtin_cpu_is) {
+ cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is");
+ return {};
+ }
+ if (builtinID == Builtin::BI__builtin_cpu_supports) {
+ cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_supports");
+ return {};
+ }
+ if (builtinID == Builtin::BI__builtin_cpu_init) {
+ cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_init");
+ return {};
+ }
+
+ // Handle MSVC intrinsics before argument evaluation to prevent double
+ // evaluation.
+ assert(!cir::MissingFeatures::msvcBuiltins());
+
+ // Find out if any arguments are required to be integer constant expressions.
+ assert(!cir::MissingFeatures::handleBuiltinICEArguments());
+
+ // The operands of the builtin call
+ llvm::SmallVector<mlir::Value> ops;
+
+ // `ICEArguments` is a bitmap indicating whether the argument at the i-th bit
+ // is required to be a constant integer expression.
+ unsigned iceArguments = 0;
+ ASTContext::GetBuiltinTypeError error;
+ getContext().GetBuiltinType(builtinID, error, &iceArguments);
+ assert(error == ASTContext::GE_None && "Error while getting builtin type.");
+
+ for (auto [idx, arg] : llvm::enumerate(expr->arguments()))
+ ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg));
+
+ CIRGenBuilderTy &builder = getBuilder();
+ mlir::Type voidTy = builder.getVoidTy();
+
+ switch (builtinID) {
+ default:
+ return {};
+ case X86::BI_mm_clflush:
+ return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+ "x86.sse2.clflush", voidTy, ops[0]);
+ case X86::BI_mm_lfence:
+ return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+ "x86.sse2.lfence", voidTy);
+ case X86::BI_mm_pause:
+ return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+ "x86.sse2.pause", voidTy);
+ case X86::BI_mm_mfence:
+ return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+ "x86.sse2.mfence", voidTy);
+ case X86::BI_mm_sfence:
+ return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+ "x86.sse.sfence", voidTy);
+ case X86::BI_mm_prefetch:
+ case X86::BI__rdtsc:
+ case X86::BI__builtin_ia32_rdtscp: {
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ }
+ case X86::BI__builtin_ia32_lzcnt_u16:
+ case X86::BI__builtin_ia32_lzcnt_u32:
+ case X86::BI__builtin_ia32_lzcnt_u64: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ mlir::Value isZeroPoison = builder.getFalse(loc);
+ return emitIntrinsicCallOp(builder, loc, "ctlz", ops[0].getType(),
+ mlir::ValueRange{ops[0], isZeroPoison});
+ }
+ case X86::BI__builtin_ia32_tzcnt_u16:
+ case X86::BI__builtin_ia32_tzcnt_u32:
+ case X86::BI__builtin_ia32_tzcnt_u64: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ mlir::Value isZeroPoison = builder.getFalse(loc);
+ return emitIntrinsicCallOp(builder, loc, "cttz", ops[0].getType(),
+ mlir::ValueRange{ops[0], isZeroPoison});
+ }
+ case X86::BI__builtin_ia32_undef128:
+ case X86::BI__builtin_ia32_undef256:
+ case X86::BI__builtin_ia32_undef512:
+ // The x86 definition of "undef" is not the same as the LLVM definition
+ // (PR32176). We leave optimizing away an unnecessary zero constant to the
+ // IR optimizer and backend.
+ // TODO: If we had a "freeze" IR instruction to generate a fixed undef
+ // value, we should use that here instead of a zero.
+ return builder.getNullValue(convertType(expr->getType()),
+ getLoc(expr->getExprLoc()));
+ case X86::BI__builtin_ia32_vec_ext_v4hi:
+ case X86::BI__builtin_ia32_vec_ext_v16qi:
+ case X86::BI__builtin_ia32_vec_ext_v8hi:
+ case X86::BI__builtin_ia32_vec_ext_v4si:
+ case X86::BI__builtin_ia32_vec_ext_v4sf:
+ case X86::BI__builtin_ia32_vec_ext_v2di:
+ case X86::BI__builtin_ia32_vec_ext_v32qi:
+ case X86::BI__builtin_ia32_vec_ext_v16hi:
+ case X86::BI__builtin_ia32_vec_ext_v8si:
+ case X86::BI__builtin_ia32_vec_ext_v4di: {
+ unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
+
+ uint64_t index = getZExtIntValueFromConstOp(ops[1]);
+ index &= numElts - 1;
+
+ cir::ConstantOp indexVal =
+ builder.getUInt64(index, getLoc(expr->getExprLoc()));
+
+ // These builtins exist so we can ensure the index is an ICE and in range.
+ // Otherwise we could just do this in the header file.
+ return cir::VecExtractOp::create(builder, getLoc(expr->getExprLoc()),
+ ops[0], indexVal);
+ }
+ case X86::BI__builtin_ia32_vec_set_v4hi:
+ case X86::BI__builtin_ia32_vec_set_v16qi:
+ case X86::BI__builtin_ia32_vec_set_v8hi:
+ case X86::BI__builtin_ia32_vec_set_v4si:
+ case X86::BI__builtin_ia32_vec_set_v2di:
+ case X86::BI__builtin_ia32_vec_set_v32qi:
+ case X86::BI__builtin_ia32_vec_set_v16hi:
+ case X86::BI__builtin_ia32_vec_set_v8si:
+ case X86::BI__builtin_ia32_vec_set_v4di: {
+ return emitVecInsert(builder, getLoc(expr->getExprLoc()), ops[0], ops[1],
+ ops[2]);
+ }
+ case X86::BI__builtin_ia32_kunpckhi:
+ return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.kunpackb", ops);
+ case X86::BI__builtin_ia32_kunpcksi:
+ return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.kunpackw", ops);
+ case X86::BI__builtin_ia32_kunpckdi:
+ return emitX86MaskUnpack(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.kunpackd", ops);
+ case X86::BI_mm_setcsr:
+ case X86::BI__builtin_ia32_ldmxcsr: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ Address tmp = createMemTemp(expr->getArg(0)->getType(), loc);
+ builder.createStore(loc, ops[0], tmp);
+ return emitIntrinsicCallOp(builder, loc, "x86.sse.ldmxcsr",
+ builder.getVoidTy(), tmp.getPointer());
+ }
+ case X86::BI_mm_getcsr:
+ case X86::BI__builtin_ia32_stmxcsr: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ Address tmp = createMemTemp(expr->getType(), loc);
+ emitIntrinsicCallOp(builder, loc, "x86.sse.stmxcsr", builder.getVoidTy(),
+ tmp.getPointer());
+ return builder.createLoad(loc, tmp);
+ }
+ case X86::BI__builtin_ia32_xsave:
+ case X86::BI__builtin_ia32_xsave64:
+ case X86::BI__builtin_ia32_xrstor:
+ case X86::BI__builtin_ia32_xrstor64:
+ case X86::BI__builtin_ia32_xsaveopt:
+ case X86::BI__builtin_ia32_xsaveopt64:
+ case X86::BI__builtin_ia32_xrstors:
+ case X86::BI__builtin_ia32_xrstors64:
+ case X86::BI__builtin_ia32_xsavec:
+ case X86::BI__builtin_ia32_xsavec64:
+ case X86::BI__builtin_ia32_xsaves:
+ case X86::BI__builtin_ia32_xsaves64:
+ case X86::BI__builtin_ia32_xsetbv:
+ case X86::BI_xsetbv:
+ case X86::BI__builtin_ia32_xgetbv:
+ case X86::BI_xgetbv:
+ case X86::BI__builtin_ia32_storedqudi128_mask:
+ case X86::BI__builtin_ia32_storedqusi128_mask:
+ case X86::BI__builtin_ia32_storedquhi128_mask:
+ case X86::BI__builtin_ia32_storedquqi128_mask:
+ case X86::BI__builtin_ia32_storeupd128_mask:
+ case X86::BI__builtin_ia32_storeups128_mask:
+ case X86::BI__builtin_ia32_storedqudi256_mask:
+ case X86::BI__builtin_ia32_storedqusi256_mask:
+ case X86::BI__builtin_ia32_storedquhi256_mask:
+ case X86::BI__builtin_ia32_storedquqi256_mask:
+ case X86::BI__builtin_ia32_storeupd256_mask:
+ case X86::BI__builtin_ia32_storeups256_mask:
+ case X86::BI__builtin_ia32_storedqudi512_mask:
+ case X86::BI__builtin_ia32_storedqusi512_mask:
+ case X86::BI__builtin_ia32_storedquhi512_mask:
+ case X86::BI__builtin_ia32_storedquqi512_mask:
+ case X86::BI__builtin_ia32_storeupd512_mask:
+ case X86::BI__builtin_ia32_storeups512_mask:
+ case X86::BI__builtin_ia32_storesbf16128_mask:
+ case X86::BI__builtin_ia32_storesh128_mask:
+ case X86::BI__builtin_ia32_storess128_mask:
+ case X86::BI__builtin_ia32_storesd128_mask:
+ case X86::BI__builtin_ia32_cvtmask2b128:
+ case X86::BI__builtin_ia32_cvtmask2b256:
+ case X86::BI__builtin_ia32_cvtmask2b512:
+ case X86::BI__builtin_ia32_cvtmask2w128:
+ case X86::BI__builtin_ia32_cvtmask2w256:
+ case X86::BI__builtin_ia32_cvtmask2w512:
+ case X86::BI__builtin_ia32_cvtmask2d128:
+ case X86::BI__builtin_ia32_cvtmask2d256:
+ case X86::BI__builtin_ia32_cvtmask2d512:
+ case X86::BI__builtin_ia32_cvtmask2q128:
+ case X86::BI__builtin_ia32_cvtmask2q256:
+ case X86::BI__builtin_ia32_cvtmask2q512:
+ case X86::BI__builtin_ia32_cvtb2mask128:
+ case X86::BI__builtin_ia32_cvtb2mask256:
+ case X86::BI__builtin_ia32_cvtb2mask512:
+ case X86::BI__builtin_ia32_cvtw2mask128:
+ case X86::BI__builtin_ia32_cvtw2mask256:
+ case X86::BI__builtin_ia32_cvtw2mask512:
+ case X86::BI__builtin_ia32_cvtd2mask128:
+ case X86::BI__builtin_ia32_cvtd2mask256:
+ case X86::BI__builtin_ia32_cvtd2mask512:
+ case X86::BI__builtin_ia32_cvtq2mask128:
+ case X86::BI__builtin_ia32_cvtq2mask256:
+ case X86::BI__builtin_ia32_cvtq2mask512:
+ case X86::BI__builtin_ia32_cvtdq2ps512_mask:
+ case X86::BI__builtin_ia32_cvtqq2ps512_mask:
+ case X86::BI__builtin_ia32_cvtqq2pd512_mask:
+ case X86::BI__builtin_ia32_vcvtw2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
+ case X86::BI__builtin_ia32_cvtudq2ps512_mask:
+ case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
+ case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
+ case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
+ case X86::BI__builtin_ia32_vfmaddsh3_mask:
+ case X86::BI__builtin_ia32_vfmaddss3_mask:
+ case X86::BI__builtin_ia32_vfmaddsd3_mask:
+ case X86::BI__builtin_ia32_vfmaddsh3_maskz:
+ case X86::BI__builtin_ia32_vfmaddss3_maskz:
+ case X86::BI__builtin_ia32_vfmaddsd3_maskz:
+ case X86::BI__builtin_ia32_vfmaddsh3_mask3:
+ case X86::BI__builtin_ia32_vfmaddss3_mask3:
+ case X86::BI__builtin_ia32_vfmaddsd3_mask3:
+ case X86::BI__builtin_ia32_vfmsubsh3_mask3:
+ case X86::BI__builtin_ia32_vfmsubss3_mask3:
+ case X86::BI__builtin_ia32_vfmsubsd3_mask3:
+ case X86::BI__builtin_ia32_vfmaddph512_mask:
+ case X86::BI__builtin_ia32_vfmaddph512_maskz:
+ case X86::BI__builtin_ia32_vfmaddph512_mask3:
+ case X86::BI__builtin_ia32_vfmaddps512_mask:
+ case X86::BI__builtin_ia32_vfmaddps512_maskz:
+ case X86::BI__builtin_ia32_vfmaddps512_mask3:
+ case X86::BI__builtin_ia32_vfmsubps512_mask3:
+ case X86::BI__builtin_ia32_vfmaddpd512_mask:
+ case X86::BI__builtin_ia32_vfmaddpd512_maskz:
+ case X86::BI__builtin_ia32_vfmaddpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubph512_mask3:
+ case X86::BI__builtin_ia32_vfmaddsubph512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
+ case X86::BI__builtin_ia32_vfmaddsubps512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
+ case X86::BI__builtin_ia32_movdqa32store128_mask:
+ case X86::BI__builtin_ia32_movdqa64store128_mask:
+ case X86::BI__builtin_ia32_storeaps128_mask:
+ case X86::BI__builtin_ia32_storeapd128_mask:
+ case X86::BI__builtin_ia32_movdqa32store256_mask:
+ case X86::BI__builtin_ia32_movdqa64store256_mask:
+ case X86::BI__builtin_ia32_storeaps256_mask:
+ case X86::BI__builtin_ia32_storeapd256_mask:
+ case X86::BI__builtin_ia32_movdqa32store512_mask:
+ case X86::BI__builtin_ia32_movdqa64store512_mask:
+ case X86::BI__builtin_ia32_storeaps512_mask:
+ case X86::BI__builtin_ia32_storeapd512_mask:
+ case X86::BI__builtin_ia32_loadups128_mask:
+ case X86::BI__builtin_ia32_loadups256_mask:
+ case X86::BI__builtin_ia32_loadups512_mask:
+ case X86::BI__builtin_ia32_loadupd128_mask:
+ case X86::BI__builtin_ia32_loadupd256_mask:
+ case X86::BI__builtin_ia32_loadupd512_mask:
+ case X86::BI__builtin_ia32_loaddquqi128_mask:
+ case X86::BI__builtin_ia32_loaddquqi256_mask:
+ case X86::BI__builtin_ia32_loaddquqi512_mask:
+ case X86::BI__builtin_ia32_loaddquhi128_mask:
+ case X86::BI__builtin_ia32_loaddquhi256_mask:
+ case X86::BI__builtin_ia32_loaddquhi512_mask:
+ case X86::BI__builtin_ia32_loaddqusi128_mask:
+ case X86::BI__builtin_ia32_loaddqusi256_mask:
+ case X86::BI__builtin_ia32_loaddqusi512_mask:
+ case X86::BI__builtin_ia32_loaddqudi128_mask:
+ case X86::BI__builtin_ia32_loaddqudi256_mask:
+ case X86::BI__builtin_ia32_loaddqudi512_mask:
+ case X86::BI__builtin_ia32_loadsbf16128_mask:
+ case X86::BI__builtin_ia32_loadsh128_mask:
+ case X86::BI__builtin_ia32_loadss128_mask:
+ case X86::BI__builtin_ia32_loadsd128_mask:
+ case X86::BI__builtin_ia32_loadaps128_mask:
+ case X86::BI__builtin_ia32_loadaps256_mask:
+ case X86::BI__builtin_ia32_loadaps512_mask:
+ case X86::BI__builtin_ia32_loadapd128_mask:
+ case X86::BI__builtin_ia32_loadapd256_mask:
+ case X86::BI__builtin_ia32_loadapd512_mask:
+ case X86::BI__builtin_ia32_movdqa32load128_mask:
+ case X86::BI__builtin_ia32_movdqa32load256_mask:
+ case X86::BI__builtin_ia32_movdqa32load512_mask:
+ case X86::BI__builtin_ia32_movdqa64load128_mask:
+ case X86::BI__builtin_ia32_movdqa64load256_mask:
+ case X86::BI__builtin_ia32_movdqa64load512_mask:
+ case X86::BI__builtin_ia32_expandloaddf128_mask:
+ case X86::BI__builtin_ia32_expandloaddf256_mask:
+ case X86::BI__builtin_ia32_expandloaddf512_mask:
+ case X86::BI__builtin_ia32_expandloadsf128_mask:
+ case X86::BI__builtin_ia32_expandloadsf256_mask:
+ case X86::BI__builtin_ia32_expandloadsf512_mask:
+ case X86::BI__builtin_ia32_expandloaddi128_mask:
+ case X86::BI__builtin_ia32_expandloaddi256_mask:
+ case X86::BI__builtin_ia32_expandloaddi512_mask:
+ case X86::BI__builtin_ia32_expandloadsi128_mask:
+ case X86::BI__builtin_ia32_expandloadsi256_mask:
+ case X86::BI__builtin_ia32_expandloadsi512_mask:
+ case X86::BI__builtin_ia32_expandloadhi128_mask:
+ case X86::BI__builtin_ia32_expandloadhi256_mask:
+ case X86::BI__builtin_ia32_expandloadhi512_mask:
+ case X86::BI__builtin_ia32_expandloadqi128_mask:
+ case X86::BI__builtin_ia32_expandloadqi256_mask:
+ case X86::BI__builtin_ia32_expandloadqi512_mask:
+ case X86::BI__builtin_ia32_compressstoredf128_mask:
+ case X86::BI__builtin_ia32_compressstoredf256_mask:
+ case X86::BI__builtin_ia32_compressstoredf512_mask:
+ case X86::BI__builtin_ia32_compressstoresf128_mask:
+ case X86::BI__builtin_ia32_compressstoresf256_mask:
+ case X86::BI__builtin_ia32_compressstoresf512_mask:
+ case X86::BI__builtin_ia32_compressstoredi128_mask:
+ case X86::BI__builtin_ia32_compressstoredi256_mask:
+ case X86::BI__builtin_ia32_compressstoredi512_mask:
+ case X86::BI__builtin_ia32_compressstoresi128_mask:
+ case X86::BI__builtin_ia32_compressstoresi256_mask:
+ case X86::BI__builtin_ia32_compressstoresi512_mask:
+ case X86::BI__builtin_ia32_compressstorehi128_mask:
+ case X86::BI__builtin_ia32_compressstorehi256_mask:
+ case X86::BI__builtin_ia32_compressstorehi512_mask:
+ case X86::BI__builtin_ia32_compressstoreqi128_mask:
+ case X86::BI__builtin_ia32_compressstoreqi256_mask:
+ case X86::BI__builtin_ia32_compressstoreqi512_mask:
+ case X86::BI__builtin_ia32_expanddf128_mask:
+ case X86::BI__builtin_ia32_expanddf256_mask:
+ case X86::BI__builtin_ia32_expanddf512_mask:
+ case X86::BI__builtin_ia32_expandsf128_mask:
+ case X86::BI__builtin_ia32_expandsf256_mask:
+ case X86::BI__builtin_ia32_expandsf512_mask:
+ case X86::BI__builtin_ia32_expanddi128_mask:
+ case X86::BI__builtin_ia32_expanddi256_mask:
+ case X86::BI__builtin_ia32_expanddi512_mask:
+ case X86::BI__builtin_ia32_expandsi128_mask:
+ case X86::BI__builtin_ia32_expandsi256_mask:
+ case X86::BI__builtin_ia32_expandsi512_mask:
+ case X86::BI__builtin_ia32_expandhi128_mask:
+ case X86::BI__builtin_ia32_expandhi256_mask:
+ case X86::BI__builtin_ia32_expandhi512_mask:
+ case X86::BI__builtin_ia32_expandqi128_mask:
+ case X86::BI__builtin_ia32_expandqi256_mask:
+ case X86::BI__builtin_ia32_expandqi512_mask:
+ case X86::BI__builtin_ia32_compressdf128_mask:
+ case X86::BI__builtin_ia32_compressdf256_mask:
+ case X86::BI__builtin_ia32_compressdf512_mask:
+ case X86::BI__builtin_ia32_compresssf128_mask:
+ case X86::BI__builtin_ia32_compresssf256_mask:
+ case X86::BI__builtin_ia32_compresssf512_mask:
+ case X86::BI__builtin_ia32_compressdi128_mask:
+ case X86::BI__builtin_ia32_compressdi256_mask:
+ case X86::BI__builtin_ia32_compressdi512_mask:
+ case X86::BI__builtin_ia32_compresssi128_mask:
+ case X86::BI__builtin_ia32_compresssi256_mask:
+ case X86::BI__builtin_ia32_compresssi512_mask:
+ case X86::BI__builtin_ia32_compresshi128_mask:
+ case X86::BI__builtin_ia32_compresshi256_mask:
+ case X86::BI__builtin_ia32_compresshi512_mask:
+ case X86::BI__builtin_ia32_compressqi128_mask:
+ case X86::BI__builtin_ia32_compressqi256_mask:
+ case X86::BI__builtin_ia32_compressqi512_mask:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ case X86::BI__builtin_ia32_gather3div2df:
+ case X86::BI__builtin_ia32_gather3div2di:
+ case X86::BI__builtin_ia32_gather3div4df:
+ case X86::BI__builtin_ia32_gather3div4di:
+ case X86::BI__builtin_ia32_gather3div4sf:
+ case X86::BI__builtin_ia32_gather3div4si:
+ case X86::BI__builtin_ia32_gather3div8sf:
+ case X86::BI__builtin_ia32_gather3div8si:
+ case X86::BI__builtin_ia32_gather3siv2df:
+ case X86::BI__builtin_ia32_gather3siv2di:
+ case X86::BI__builtin_ia32_gather3siv4df:
+ case X86::BI__builtin_ia32_gather3siv4di:
+ case X86::BI__builtin_ia32_gather3siv4sf:
+ case X86::BI__builtin_ia32_gather3siv4si:
+ case X86::BI__builtin_ia32_gather3siv8sf:
+ case X86::BI__builtin_ia32_gather3siv8si:
+ case X86::BI__builtin_ia32_gathersiv8df:
+ case X86::BI__builtin_ia32_gathersiv16sf:
+ case X86::BI__builtin_ia32_gatherdiv8df:
+ case X86::BI__builtin_ia32_gatherdiv16sf:
+ case X86::BI__builtin_ia32_gathersiv8di:
+ case X86::BI__builtin_ia32_gathersiv16si:
+ case X86::BI__builtin_ia32_gatherdiv8di:
+ case X86::BI__builtin_ia32_gatherdiv16si: {
+ StringRef intrinsicName;
+ switch (builtinID) {
+ default:
+ llvm_unreachable("Unexpected builtin");
+ case X86::BI__builtin_ia32_gather3div2df:
+ intrinsicName = "x86.avx512.mask.gather3div2.df";
+ break;
+ case X86::BI__builtin_ia32_gather3div2di:
+ intrinsicName = "x86.avx512.mask.gather3div2.di";
+ break;
+ case X86::BI__builtin_ia32_gather3div4df:
+ intrinsicName = "x86.avx512.mask.gather3div4.df";
+ break;
+ case X86::BI__builtin_ia32_gather3div4di:
+ intrinsicName = "x86.avx512.mask.gather3div4.di";
+ break;
+ case X86::BI__builtin_ia32_gather3div4sf:
+ intrinsicName = "x86.avx512.mask.gather3div4.sf";
+ break;
+ case X86::BI__builtin_ia32_gather3div4si:
+ intrinsicName = "x86.avx512.mask.gather3div4.si";
+ break;
+ case X86::BI__builtin_ia32_gather3div8sf:
+ intrinsicName = "x86.avx512.mask.gather3div8.sf";
+ break;
+ case X86::BI__builtin_ia32_gather3div8si:
+ intrinsicName = "x86.avx512.mask.gather3div8.si";
+ break;
+ case X86::BI__builtin_ia32_gather3siv2df:
+ intrinsicName = "x86.avx512.mask.gather3siv2.df";
+ break;
+ case X86::BI__builtin_ia32_gather3siv2di:
+ intrinsicName = "x86.avx512.mask.gather3siv2.di";
+ break;
+ case X86::BI__builtin_ia32_gather3siv4df:
+ intrinsicName = "x86.avx512.mask.gather3siv4.df";
+ break;
+ case X86::BI__builtin_ia32_gather3siv4di:
+ intrinsicName = "x86.avx512.mask.gather3siv4.di";
+ break;
+ case X86::BI__builtin_ia32_gather3siv4sf:
+ intrinsicName = "x86.avx512.mask.gather3siv4.sf";
+ break;
+ case X86::BI__builtin_ia32_gather3siv4si:
+ intrinsicName = "x86.avx512.mask.gather3siv4.si";
+ break;
+ case X86::BI__builtin_ia32_gather3siv8sf:
+ intrinsicName = "x86.avx512.mask.gather3siv8.sf";
+ break;
+ case X86::BI__builtin_ia32_gather3siv8si:
+ intrinsicName = "x86.avx512.mask.gather3siv8.si";
+ break;
+ case X86::BI__builtin_ia32_gathersiv8df:
+ intrinsicName = "x86.avx512.mask.gather.dpd.512";
+ break;
+ case X86::BI__builtin_ia32_gathersiv16sf:
+ intrinsicName = "x86.avx512.mask.gather.dps.512";
+ break;
+ case X86::BI__builtin_ia32_gatherdiv8df:
+ intrinsicName = "x86.avx512.mask.gather.qpd.512";
+ break;
+ case X86::BI__builtin_ia32_gatherdiv16sf:
+ intrinsicName = "x86.avx512.mask.gather.qps.512";
+ break;
+ case X86::BI__builtin_ia32_gathersiv8di:
+ intrinsicName = "x86.avx512.mask.gather.dpq.512";
+ break;
+ case X86::BI__builtin_ia32_gathersiv16si:
+ intrinsicName = "x86.avx512.mask.gather.dpi.512";
+ break;
+ case X86::BI__builtin_ia32_gatherdiv8di:
+ intrinsicName = "x86.avx512.mask.gather.qpq.512";
+ break;
+ case X86::BI__builtin_ia32_gatherdiv16si:
+ intrinsicName = "x86.avx512.mask.gather.qpi.512";
+ break;
+ }
+
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ unsigned minElts =
+ std::min(cast<cir::VectorType>(ops[0].getType()).getSize(),
+ cast<cir::VectorType>(ops[2].getType()).getSize());
+ ops[3] = getMaskVecValue(builder, loc, ops[3], minElts);
+ return emitIntrinsicCallOp(builder, loc, intrinsicName,
+ convertType(expr->getType()), ops);
+ }
+ case X86::BI__builtin_ia32_scattersiv8df:
+ case X86::BI__builtin_ia32_scattersiv16sf:
+ case X86::BI__builtin_ia32_scatterdiv8df:
+ case X86::BI__builtin_ia32_scatterdiv16sf:
+ case X86::BI__builtin_ia32_scattersiv8di:
+ case X86::BI__builtin_ia32_scattersiv16si:
+ case X86::BI__builtin_ia32_scatterdiv8di:
+ case X86::BI__builtin_ia32_scatterdiv16si:
+ case X86::BI__builtin_ia32_scatterdiv2df:
+ case X86::BI__builtin_ia32_scatterdiv2di:
+ case X86::BI__builtin_ia32_scatterdiv4df:
+ case X86::BI__builtin_ia32_scatterdiv4di:
+ case X86::BI__builtin_ia32_scatterdiv4sf:
+ case X86::BI__builtin_ia32_scatterdiv4si:
+ case X86::BI__builtin_ia32_scatterdiv8sf:
+ case X86::BI__builtin_ia32_scatterdiv8si:
+ case X86::BI__builtin_ia32_scattersiv2df:
+ case X86::BI__builtin_ia32_scattersiv2di:
+ case X86::BI__builtin_ia32_scattersiv4df:
+ case X86::BI__builtin_ia32_scattersiv4di:
+ case X86::BI__builtin_ia32_scattersiv4sf:
+ case X86::BI__builtin_ia32_scattersiv4si:
+ case X86::BI__builtin_ia32_scattersiv8sf:
+ case X86::BI__builtin_ia32_scattersiv8si: {
+ llvm::StringRef intrinsicName;
+ switch (builtinID) {
+ default:
+ llvm_unreachable("Unexpected builtin");
+ case X86::BI__builtin_ia32_scattersiv8df:
+ intrinsicName = "x86.avx512.mask.scatter.dpd.512";
+ break;
+ case X86::BI__builtin_ia32_scattersiv16sf:
+ intrinsicName = "x86.avx512.mask.scatter.dps.512";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv8df:
+ intrinsicName = "x86.avx512.mask.scatter.qpd.512";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv16sf:
+ intrinsicName = "x86.avx512.mask.scatter.qps.512";
+ break;
+ case X86::BI__builtin_ia32_scattersiv8di:
+ intrinsicName = "x86.avx512.mask.scatter.dpq.512";
+ break;
+ case X86::BI__builtin_ia32_scattersiv16si:
+ intrinsicName = "x86.avx512.mask.scatter.dpi.512";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv8di:
+ intrinsicName = "x86.avx512.mask.scatter.qpq.512";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv16si:
+ intrinsicName = "x86.avx512.mask.scatter.qpi.512";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv2df:
+ intrinsicName = "x86.avx512.mask.scatterdiv2.df";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv2di:
+ intrinsicName = "x86.avx512.mask.scatterdiv2.di";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv4df:
+ intrinsicName = "x86.avx512.mask.scatterdiv4.df";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv4di:
+ intrinsicName = "x86.avx512.mask.scatterdiv4.di";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv4sf:
+ intrinsicName = "x86.avx512.mask.scatterdiv4.sf";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv4si:
+ intrinsicName = "x86.avx512.mask.scatterdiv4.si";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv8sf:
+ intrinsicName = "x86.avx512.mask.scatterdiv8.sf";
+ break;
+ case X86::BI__builtin_ia32_scatterdiv8si:
+ intrinsicName = "x86.avx512.mask.scatterdiv8.si";
+ break;
+ case X86::BI__builtin_ia32_scattersiv2df:
+ intrinsicName = "x86.avx512.mask.scattersiv2.df";
+ break;
+ case X86::BI__builtin_ia32_scattersiv2di:
+ intrinsicName = "x86.avx512.mask.scattersiv2.di";
+ break;
+ case X86::BI__builtin_ia32_scattersiv4df:
+ intrinsicName = "x86.avx512.mask.scattersiv4.df";
+ break;
+ case X86::BI__builtin_ia32_scattersiv4di:
+ intrinsicName = "x86.avx512.mask.scattersiv4.di";
+ break;
+ case X86::BI__builtin_ia32_scattersiv4sf:
+ intrinsicName = "x86.avx512.mask.scattersiv4.sf";
+ break;
+ case X86::BI__builtin_ia32_scattersiv4si:
+ intrinsicName = "x86.avx512.mask.scattersiv4.si";
+ break;
+ case X86::BI__builtin_ia32_scattersiv8sf:
+ intrinsicName = "x86.avx512.mask.scattersiv8.sf";
+ break;
+ case X86::BI__builtin_ia32_scattersiv8si:
+ intrinsicName = "x86.avx512.mask.scattersiv8.si";
+ break;
+ }
+
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ unsigned minElts =
+ std::min(cast<cir::VectorType>(ops[2].getType()).getSize(),
+ cast<cir::VectorType>(ops[3].getType()).getSize());
+ ops[1] = getMaskVecValue(builder, loc, ops[1], minElts);
+
+ return emitIntrinsicCallOp(builder, loc, intrinsicName,
+ convertType(expr->getType()), ops);
+ }
+ case X86::BI__builtin_ia32_vextractf128_pd256:
+ case X86::BI__builtin_ia32_vextractf128_ps256:
+ case X86::BI__builtin_ia32_vextractf128_si256:
+ case X86::BI__builtin_ia32_extract128i256:
+ case X86::BI__builtin_ia32_extractf64x4_mask:
+ case X86::BI__builtin_ia32_extractf32x4_mask:
+ case X86::BI__builtin_ia32_extracti64x4_mask:
+ case X86::BI__builtin_ia32_extracti32x4_mask:
+ case X86::BI__builtin_ia32_extractf32x8_mask:
+ case X86::BI__builtin_ia32_extracti32x8_mask:
+ case X86::BI__builtin_ia32_extractf32x4_256_mask:
+ case X86::BI__builtin_ia32_extracti32x4_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_256_mask:
+ case X86::BI__builtin_ia32_extracti64x2_256_mask:
+ case X86::BI__builtin_ia32_extractf64x2_512_mask:
+ case X86::BI__builtin_ia32_extracti64x2_512_mask:
+ case X86::BI__builtin_ia32_vinsertf128_pd256:
+ case X86::BI__builtin_ia32_vinsertf128_ps256:
+ case X86::BI__builtin_ia32_vinsertf128_si256:
+ case X86::BI__builtin_ia32_insert128i256:
+ case X86::BI__builtin_ia32_insertf64x4:
+ case X86::BI__builtin_ia32_insertf32x4:
+ case X86::BI__builtin_ia32_inserti64x4:
+ case X86::BI__builtin_ia32_inserti32x4:
+ case X86::BI__builtin_ia32_insertf32x8:
+ case X86::BI__builtin_ia32_inserti32x8:
+ case X86::BI__builtin_ia32_insertf32x4_256:
+ case X86::BI__builtin_ia32_inserti32x4_256:
+ case X86::BI__builtin_ia32_insertf64x2_256:
+ case X86::BI__builtin_ia32_inserti64x2_256:
+ case X86::BI__builtin_ia32_insertf64x2_512:
+ case X86::BI__builtin_ia32_inserti64x2_512:
+ case X86::BI__builtin_ia32_pmovqd512_mask:
+ case X86::BI__builtin_ia32_pmovwb512_mask:
+ case X86::BI__builtin_ia32_pblendw128:
+ case X86::BI__builtin_ia32_blendpd:
+ case X86::BI__builtin_ia32_blendps:
+ case X86::BI__builtin_ia32_blendpd256:
+ case X86::BI__builtin_ia32_blendps256:
+ case X86::BI__builtin_ia32_pblendw256:
+ case X86::BI__builtin_ia32_pblendd128:
+ case X86::BI__builtin_ia32_pblendd256:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ case X86::BI__builtin_ia32_pshuflw:
+ case X86::BI__builtin_ia32_pshuflw256:
+ case X86::BI__builtin_ia32_pshuflw512:
+ return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()),
+ true);
+ case X86::BI__builtin_ia32_pshufhw:
+ case X86::BI__builtin_ia32_pshufhw256:
+ case X86::BI__builtin_ia32_pshufhw512:
+ return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()),
+ false);
+ case X86::BI__builtin_ia32_pshufd:
+ case X86::BI__builtin_ia32_pshufd256:
+ case X86::BI__builtin_ia32_pshufd512:
+ case X86::BI__builtin_ia32_vpermilpd:
+ case X86::BI__builtin_ia32_vpermilps:
+ case X86::BI__builtin_ia32_vpermilpd256:
+ case X86::BI__builtin_ia32_vpermilps256:
+ case X86::BI__builtin_ia32_vpermilpd512:
+ case X86::BI__builtin_ia32_vpermilps512: {
+ const uint32_t imm = getSExtIntValueFromConstOp(ops[1]);
+
+ llvm::SmallVector<int64_t, 16> mask(16);
+ computeFullLaneShuffleMask(*this, ops[0], imm, false, mask);
+
+ return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], mask);
+ }
+ case X86::BI__builtin_ia32_shufpd:
+ case X86::BI__builtin_ia32_shufpd256:
+ case X86::BI__builtin_ia32_shufpd512:
+ case X86::BI__builtin_ia32_shufps:
+ case X86::BI__builtin_ia32_shufps256:
+ case X86::BI__builtin_ia32_shufps512: {
+ const uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
+
+ llvm::SmallVector<int64_t, 16> mask(16);
+ computeFullLaneShuffleMask(*this, ops[0], imm, true, mask);
+
+ return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
+ mask);
+ }
+ case X86::BI__builtin_ia32_permdi256:
+ case X86::BI__builtin_ia32_permdf256:
+ case X86::BI__builtin_ia32_permdi512:
+ case X86::BI__builtin_ia32_permdf512:
+ case X86::BI__builtin_ia32_palignr128:
+ case X86::BI__builtin_ia32_palignr256:
+ case X86::BI__builtin_ia32_palignr512:
+ case X86::BI__builtin_ia32_alignd128:
+ case X86::BI__builtin_ia32_alignd256:
+ case X86::BI__builtin_ia32_alignd512:
+ case X86::BI__builtin_ia32_alignq128:
+ case X86::BI__builtin_ia32_alignq256:
+ case X86::BI__builtin_ia32_alignq512:
+ case X86::BI__builtin_ia32_shuf_f32x4_256:
+ case X86::BI__builtin_ia32_shuf_f64x2_256:
+ case X86::BI__builtin_ia32_shuf_i32x4_256:
+ case X86::BI__builtin_ia32_shuf_i64x2_256:
+ case X86::BI__builtin_ia32_shuf_f32x4:
+ case X86::BI__builtin_ia32_shuf_f64x2:
+ case X86::BI__builtin_ia32_shuf_i32x4:
+ case X86::BI__builtin_ia32_shuf_i64x2:
+ case X86::BI__builtin_ia32_vperm2f128_pd256:
+ case X86::BI__builtin_ia32_vperm2f128_ps256:
+ case X86::BI__builtin_ia32_vperm2f128_si256:
+ case X86::BI__builtin_ia32_permti256:
+ case X86::BI__builtin_ia32_pslldqi128_byteshift:
+ case X86::BI__builtin_ia32_pslldqi256_byteshift:
+ case X86::BI__builtin_ia32_pslldqi512_byteshift:
+ case X86::BI__builtin_ia32_psrldqi128_byteshift:
+ case X86::BI__builtin_ia32_psrldqi256_byteshift:
+ case X86::BI__builtin_ia32_psrldqi512_byteshift:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ case X86::BI__builtin_ia32_kshiftliqi:
+ case X86::BI__builtin_ia32_kshiftlihi:
+ case X86::BI__builtin_ia32_kshiftlisi:
+ case X86::BI__builtin_ia32_kshiftlidi: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ unsigned shiftVal =
+ ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
+ 0xff;
+ unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
+
+ if (shiftVal >= numElems)
+ return builder.getNullValue(ops[0].getType(), loc);
+
+ mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems);
+
+ SmallVector<mlir::Attribute, 64> indices;
+ mlir::Type i32Ty = builder.getSInt32Ty();
+ for (auto i : llvm::seq<unsigned>(0, numElems))
+ indices.push_back(cir::IntAttr::get(i32Ty, numElems + i - shiftVal));
+
+ mlir::Value zero = builder.getNullValue(in.getType(), loc);
+ mlir::Value sv = builder.createVecShuffle(loc, zero, in, indices);
+ return builder.createBitcast(sv, ops[0].getType());
+ }
+ case X86::BI__builtin_ia32_kshiftriqi:
+ case X86::BI__builtin_ia32_kshiftrihi:
+ case X86::BI__builtin_ia32_kshiftrisi:
+ case X86::BI__builtin_ia32_kshiftridi: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ unsigned shiftVal =
+ ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
+ 0xff;
+ unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();
+
+ if (shiftVal >= numElems)
+ return builder.getNullValue(ops[0].getType(), loc);
+
+ mlir::Value in = getMaskVecValue(builder, loc, ops[0], numElems);
+
+ SmallVector<mlir::Attribute, 64> indices;
+ mlir::Type i32Ty = builder.getSInt32Ty();
+ for (auto i : llvm::seq<unsigned>(0, numElems))
+ indices.push_back(cir::IntAttr::get(i32Ty, i + shiftVal));
+
+ mlir::Value zero = builder.getNullValue(in.getType(), loc);
+ mlir::Value sv = builder.createVecShuffle(loc, in, zero, indices);
+ return builder.createBitcast(sv, ops[0].getType());
+ }
+ case X86::BI__builtin_ia32_vprotbi:
+ case X86::BI__builtin_ia32_vprotwi:
+ case X86::BI__builtin_ia32_vprotdi:
+ case X86::BI__builtin_ia32_vprotqi:
+ case X86::BI__builtin_ia32_prold128:
+ case X86::BI__builtin_ia32_prold256:
+ case X86::BI__builtin_ia32_prold512:
+ case X86::BI__builtin_ia32_prolq128:
+ case X86::BI__builtin_ia32_prolq256:
+ case X86::BI__builtin_ia32_prolq512:
+ return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0],
+ ops[0], ops[1], false);
+ case X86::BI__builtin_ia32_prord128:
+ case X86::BI__builtin_ia32_prord256:
+ case X86::BI__builtin_ia32_prord512:
+ case X86::BI__builtin_ia32_prorq128:
+ case X86::BI__builtin_ia32_prorq256:
+ case X86::BI__builtin_ia32_prorq512:
+ return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0],
+ ops[0], ops[1], true);
+ case X86::BI__builtin_ia32_selectb_128:
+ case X86::BI__builtin_ia32_selectb_256:
+ case X86::BI__builtin_ia32_selectb_512:
+ case X86::BI__builtin_ia32_selectw_128:
+ case X86::BI__builtin_ia32_selectw_256:
+ case X86::BI__builtin_ia32_selectw_512:
+ case X86::BI__builtin_ia32_selectd_128:
+ case X86::BI__builtin_ia32_selectd_256:
+ case X86::BI__builtin_ia32_selectd_512:
+ case X86::BI__builtin_ia32_selectq_128:
+ case X86::BI__builtin_ia32_selectq_256:
+ case X86::BI__builtin_ia32_selectq_512:
+ case X86::BI__builtin_ia32_selectph_128:
+ case X86::BI__builtin_ia32_selectph_256:
+ case X86::BI__builtin_ia32_selectph_512:
+ case X86::BI__builtin_ia32_selectpbf_128:
+ case X86::BI__builtin_ia32_selectpbf_256:
+ case X86::BI__builtin_ia32_selectpbf_512:
+ case X86::BI__builtin_ia32_selectps_128:
+ case X86::BI__builtin_ia32_selectps_256:
+ case X86::BI__builtin_ia32_selectps_512:
+ case X86::BI__builtin_ia32_selectpd_128:
+ case X86::BI__builtin_ia32_selectpd_256:
+ case X86::BI__builtin_ia32_selectpd_512:
+ case X86::BI__builtin_ia32_selectsh_128:
+ case X86::BI__builtin_ia32_selectsbf_128:
+ case X86::BI__builtin_ia32_selectss_128:
+ case X86::BI__builtin_ia32_selectsd_128:
+ case X86::BI__builtin_ia32_cmpb128_mask:
+ case X86::BI__builtin_ia32_cmpb256_mask:
+ case X86::BI__builtin_ia32_cmpb512_mask:
+ case X86::BI__builtin_ia32_cmpw128_mask:
+ case X86::BI__builtin_ia32_cmpw256_mask:
+ case X86::BI__builtin_ia32_cmpw512_mask:
+ case X86::BI__builtin_ia32_cmpd128_mask:
+ case X86::BI__builtin_ia32_cmpd256_mask:
+ case X86::BI__builtin_ia32_cmpd512_mask:
+ case X86::BI__builtin_ia32_cmpq128_mask:
+ case X86::BI__builtin_ia32_cmpq256_mask:
+ case X86::BI__builtin_ia32_cmpq512_mask:
+ case X86::BI__builtin_ia32_ucmpb128_mask:
+ case X86::BI__builtin_ia32_ucmpb256_mask:
+ case X86::BI__builtin_ia32_ucmpb512_mask:
+ case X86::BI__builtin_ia32_ucmpw128_mask:
+ case X86::BI__builtin_ia32_ucmpw256_mask:
+ case X86::BI__builtin_ia32_ucmpw512_mask:
+ case X86::BI__builtin_ia32_ucmpd128_mask:
+ case X86::BI__builtin_ia32_ucmpd256_mask:
+ case X86::BI__builtin_ia32_ucmpd512_mask:
+ case X86::BI__builtin_ia32_ucmpq128_mask:
+ case X86::BI__builtin_ia32_ucmpq256_mask:
+ case X86::BI__builtin_ia32_ucmpq512_mask:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ case X86::BI__builtin_ia32_vpcomb:
+ case X86::BI__builtin_ia32_vpcomw:
+ case X86::BI__builtin_ia32_vpcomd:
+ case X86::BI__builtin_ia32_vpcomq:
+ return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true);
+ case X86::BI__builtin_ia32_vpcomub:
+ case X86::BI__builtin_ia32_vpcomuw:
+ case X86::BI__builtin_ia32_vpcomud:
+ case X86::BI__builtin_ia32_vpcomuq:
+ return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false);
+ case X86::BI__builtin_ia32_kortestcqi:
+ case X86::BI__builtin_ia32_kortestchi:
+ case X86::BI__builtin_ia32_kortestcsi:
+ case X86::BI__builtin_ia32_kortestcdi: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ cir::IntType ty = cast<cir::IntType>(ops[0].getType());
+ mlir::Value allOnesOp =
+ builder.getConstAPInt(loc, ty, APInt::getAllOnes(ty.getWidth()));
+ mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
+ mlir::Value cmp =
+ cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allOnesOp);
+ return builder.createCast(cir::CastKind::bool_to_int, cmp,
+ cgm.convertType(expr->getType()));
+ }
+ case X86::BI__builtin_ia32_kortestzqi:
+ case X86::BI__builtin_ia32_kortestzhi:
+ case X86::BI__builtin_ia32_kortestzsi:
+ case X86::BI__builtin_ia32_kortestzdi: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ cir::IntType ty = cast<cir::IntType>(ops[0].getType());
+ mlir::Value allZerosOp = builder.getNullValue(ty, loc).getResult();
+ mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
+ mlir::Value cmp =
+ cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allZerosOp);
+ return builder.createCast(cir::CastKind::bool_to_int, cmp,
+ cgm.convertType(expr->getType()));
+ }
+ case X86::BI__builtin_ia32_ktestcqi:
+ return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.ktestc.b", ops);
+ case X86::BI__builtin_ia32_ktestzqi:
+ return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.ktestz.b", ops);
+ case X86::BI__builtin_ia32_ktestchi:
+ return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.ktestc.w", ops);
+ case X86::BI__builtin_ia32_ktestzhi:
+ return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.ktestz.w", ops);
+ case X86::BI__builtin_ia32_ktestcsi:
+ return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.ktestc.d", ops);
+ case X86::BI__builtin_ia32_ktestzsi:
+ return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.ktestz.d", ops);
+ case X86::BI__builtin_ia32_ktestcdi:
+ return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.ktestc.q", ops);
+ case X86::BI__builtin_ia32_ktestzdi:
+ return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.ktestz.q", ops);
+ case X86::BI__builtin_ia32_kaddqi:
+ return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.kadd.b", ops);
+ case X86::BI__builtin_ia32_kaddhi:
+ return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.kadd.w", ops);
+ case X86::BI__builtin_ia32_kaddsi:
+ return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.kadd.d", ops);
+ case X86::BI__builtin_ia32_kadddi:
+ return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
+ "x86.avx512.kadd.q", ops);
+ case X86::BI__builtin_ia32_kandqi:
+ case X86::BI__builtin_ia32_kandhi:
+ case X86::BI__builtin_ia32_kandsi:
+ case X86::BI__builtin_ia32_kanddi:
+ return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
+ cir::BinOpKind::And, ops);
+ case X86::BI__builtin_ia32_kandnqi:
+ case X86::BI__builtin_ia32_kandnhi:
+ case X86::BI__builtin_ia32_kandnsi:
+ case X86::BI__builtin_ia32_kandndi:
+ return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
+ cir::BinOpKind::And, ops, true);
+ case X86::BI__builtin_ia32_korqi:
+ case X86::BI__builtin_ia32_korhi:
+ case X86::BI__builtin_ia32_korsi:
+ case X86::BI__builtin_ia32_kordi:
+ return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
+ cir::BinOpKind::Or, ops);
+ case X86::BI__builtin_ia32_kxnorqi:
+ case X86::BI__builtin_ia32_kxnorhi:
+ case X86::BI__builtin_ia32_kxnorsi:
+ case X86::BI__builtin_ia32_kxnordi:
+ return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
+ cir::BinOpKind::Xor, ops, true);
+ case X86::BI__builtin_ia32_kxorqi:
+ case X86::BI__builtin_ia32_kxorhi:
+ case X86::BI__builtin_ia32_kxorsi:
+ case X86::BI__builtin_ia32_kxordi:
+ return emitX86MaskLogic(builder, getLoc(expr->getExprLoc()),
+ cir::BinOpKind::Xor, ops);
+ case X86::BI__builtin_ia32_knotqi:
+ case X86::BI__builtin_ia32_knothi:
+ case X86::BI__builtin_ia32_knotsi:
+ case X86::BI__builtin_ia32_knotdi: {
+ cir::IntType intTy = cast<cir::IntType>(ops[0].getType());
+ unsigned numElts = intTy.getWidth();
+ mlir::Value resVec =
+ getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts);
+ return builder.createBitcast(builder.createNot(resVec), ops[0].getType());
+ }
+ case X86::BI__builtin_ia32_kmovb:
+ case X86::BI__builtin_ia32_kmovw:
+ case X86::BI__builtin_ia32_kmovd:
+ case X86::BI__builtin_ia32_kmovq: {
+ // Bitcast to vXi1 type and then back to integer. This gets the mask
+ // register type into the IR, but might be optimized out depending on
+ // what's around it.
+ cir::IntType intTy = cast<cir::IntType>(ops[0].getType());
+ unsigned numElts = intTy.getWidth();
+ mlir::Value resVec =
+ getMaskVecValue(builder, getLoc(expr->getExprLoc()), ops[0], numElts);
+ return builder.createBitcast(resVec, ops[0].getType());
+ }
+ case X86::BI__builtin_ia32_sqrtsh_round_mask:
+ case X86::BI__builtin_ia32_sqrtsd_round_mask:
+ case X86::BI__builtin_ia32_sqrtss_round_mask:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ case X86::BI__builtin_ia32_sqrtph512:
+ case X86::BI__builtin_ia32_sqrtps512:
+ case X86::BI__builtin_ia32_sqrtpd512: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ mlir::Value arg = ops[0];
+ return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult();
+ }
+ case X86::BI__builtin_ia32_pmuludq128:
+ case X86::BI__builtin_ia32_pmuludq256:
+ case X86::BI__builtin_ia32_pmuludq512: {
+ unsigned opTypePrimitiveSizeInBits =
+ cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
+ return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ false,
+ ops, opTypePrimitiveSizeInBits);
+ }
+ case X86::BI__builtin_ia32_pmuldq128:
+ case X86::BI__builtin_ia32_pmuldq256:
+ case X86::BI__builtin_ia32_pmuldq512: {
+ unsigned opTypePrimitiveSizeInBits =
+ cgm.getDataLayout().getTypeSizeInBits(ops[0].getType());
+ return emitX86Muldq(builder, getLoc(expr->getExprLoc()), /*isSigned*/ true,
+ ops, opTypePrimitiveSizeInBits);
+ }
+ case X86::BI__builtin_ia32_pternlogd512_mask:
+ case X86::BI__builtin_ia32_pternlogq512_mask:
+ case X86::BI__builtin_ia32_pternlogd128_mask:
+ case X86::BI__builtin_ia32_pternlogd256_mask:
+ case X86::BI__builtin_ia32_pternlogq128_mask:
+ case X86::BI__builtin_ia32_pternlogq256_mask:
+ case X86::BI__builtin_ia32_pternlogd512_maskz:
+ case X86::BI__builtin_ia32_pternlogq512_maskz:
+ case X86::BI__builtin_ia32_pternlogd128_maskz:
+ case X86::BI__builtin_ia32_pternlogd256_maskz:
+ case X86::BI__builtin_ia32_pternlogq128_maskz:
+ case X86::BI__builtin_ia32_pternlogq256_maskz:
+ case X86::BI__builtin_ia32_vpshldd128:
+ case X86::BI__builtin_ia32_vpshldd256:
+ case X86::BI__builtin_ia32_vpshldd512:
+ case X86::BI__builtin_ia32_vpshldq128:
+ case X86::BI__builtin_ia32_vpshldq256:
+ case X86::BI__builtin_ia32_vpshldq512:
+ case X86::BI__builtin_ia32_vpshldw128:
+ case X86::BI__builtin_ia32_vpshldw256:
+ case X86::BI__builtin_ia32_vpshldw512:
+ case X86::BI__builtin_ia32_vpshrdd128:
+ case X86::BI__builtin_ia32_vpshrdd256:
+ case X86::BI__builtin_ia32_vpshrdd512:
+ case X86::BI__builtin_ia32_vpshrdq128:
+ case X86::BI__builtin_ia32_vpshrdq256:
+ case X86::BI__builtin_ia32_vpshrdq512:
+ case X86::BI__builtin_ia32_vpshrdw128:
+ case X86::BI__builtin_ia32_vpshrdw256:
+ case X86::BI__builtin_ia32_vpshrdw512:
+ case X86::BI__builtin_ia32_reduce_fadd_pd512:
+ case X86::BI__builtin_ia32_reduce_fadd_ps512:
+ case X86::BI__builtin_ia32_reduce_fadd_ph512:
+ case X86::BI__builtin_ia32_reduce_fadd_ph256:
+ case X86::BI__builtin_ia32_reduce_fadd_ph128:
+ case X86::BI__builtin_ia32_reduce_fmul_pd512:
+ case X86::BI__builtin_ia32_reduce_fmul_ps512:
+ case X86::BI__builtin_ia32_reduce_fmul_ph512:
+ case X86::BI__builtin_ia32_reduce_fmul_ph256:
+ case X86::BI__builtin_ia32_reduce_fmul_ph128:
+ case X86::BI__builtin_ia32_reduce_fmax_pd512:
+ case X86::BI__builtin_ia32_reduce_fmax_ps512:
+ case X86::BI__builtin_ia32_reduce_fmax_ph512:
+ case X86::BI__builtin_ia32_reduce_fmax_ph256:
+ case X86::BI__builtin_ia32_reduce_fmax_ph128:
+ case X86::BI__builtin_ia32_reduce_fmin_pd512:
+ case X86::BI__builtin_ia32_reduce_fmin_ps512:
+ case X86::BI__builtin_ia32_reduce_fmin_ph512:
+ case X86::BI__builtin_ia32_reduce_fmin_ph256:
+ case X86::BI__builtin_ia32_reduce_fmin_ph128:
+ case X86::BI__builtin_ia32_rdrand16_step:
+ case X86::BI__builtin_ia32_rdrand32_step:
+ case X86::BI__builtin_ia32_rdrand64_step:
+ case X86::BI__builtin_ia32_rdseed16_step:
+ case X86::BI__builtin_ia32_rdseed32_step:
+ case X86::BI__builtin_ia32_rdseed64_step:
+ case X86::BI__builtin_ia32_addcarryx_u32:
+ case X86::BI__builtin_ia32_addcarryx_u64:
+ case X86::BI__builtin_ia32_subborrow_u32:
+ case X86::BI__builtin_ia32_subborrow_u64:
+ case X86::BI__builtin_ia32_fpclassps128_mask:
+ case X86::BI__builtin_ia32_fpclassps256_mask:
+ case X86::BI__builtin_ia32_fpclassps512_mask:
+ case X86::BI__builtin_ia32_vfpclassbf16128_mask:
+ case X86::BI__builtin_ia32_vfpclassbf16256_mask:
+ case X86::BI__builtin_ia32_vfpclassbf16512_mask:
+ case X86::BI__builtin_ia32_fpclassph128_mask:
+ case X86::BI__builtin_ia32_fpclassph256_mask:
+ case X86::BI__builtin_ia32_fpclassph512_mask:
+ case X86::BI__builtin_ia32_fpclasspd128_mask:
+ case X86::BI__builtin_ia32_fpclasspd256_mask:
+ case X86::BI__builtin_ia32_fpclasspd512_mask:
+ case X86::BI__builtin_ia32_vp2intersect_q_512:
+ case X86::BI__builtin_ia32_vp2intersect_q_256:
+ case X86::BI__builtin_ia32_vp2intersect_q_128:
+ case X86::BI__builtin_ia32_vp2intersect_d_512:
+ case X86::BI__builtin_ia32_vp2intersect_d_256:
+ case X86::BI__builtin_ia32_vp2intersect_d_128:
+ case X86::BI__builtin_ia32_vpmultishiftqb128:
+ case X86::BI__builtin_ia32_vpmultishiftqb256:
+ case X86::BI__builtin_ia32_vpmultishiftqb512:
+ case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
+ case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
+ case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
+ case X86::BI__builtin_ia32_cmpeqps:
+ case X86::BI__builtin_ia32_cmpeqpd:
+ case X86::BI__builtin_ia32_cmpltps:
+ case X86::BI__builtin_ia32_cmpltpd:
+ case X86::BI__builtin_ia32_cmpleps:
+ case X86::BI__builtin_ia32_cmplepd:
+ case X86::BI__builtin_ia32_cmpunordps:
+ case X86::BI__builtin_ia32_cmpunordpd:
+ case X86::BI__builtin_ia32_cmpneqps:
+ case X86::BI__builtin_ia32_cmpneqpd:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ case X86::BI__builtin_ia32_cmpnltps:
+ case X86::BI__builtin_ia32_cmpnltpd:
+ return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
+ cir::CmpOpKind::lt, /*shouldInvert=*/true);
+ case X86::BI__builtin_ia32_cmpnleps:
+ case X86::BI__builtin_ia32_cmpnlepd:
+ return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
+ cir::CmpOpKind::le, /*shouldInvert=*/true);
+ case X86::BI__builtin_ia32_cmpordps:
+ case X86::BI__builtin_ia32_cmpordpd:
+ case X86::BI__builtin_ia32_cmpph128_mask:
+ case X86::BI__builtin_ia32_cmpph256_mask:
+ case X86::BI__builtin_ia32_cmpph512_mask:
+ case X86::BI__builtin_ia32_cmpps128_mask:
+ case X86::BI__builtin_ia32_cmpps256_mask:
+ case X86::BI__builtin_ia32_cmpps512_mask:
+ case X86::BI__builtin_ia32_cmppd128_mask:
+ case X86::BI__builtin_ia32_cmppd256_mask:
+ case X86::BI__builtin_ia32_cmppd512_mask:
+ case X86::BI__builtin_ia32_vcmpbf16512_mask:
+ case X86::BI__builtin_ia32_vcmpbf16256_mask:
+ case X86::BI__builtin_ia32_vcmpbf16128_mask:
+ case X86::BI__builtin_ia32_cmpps:
+ case X86::BI__builtin_ia32_cmpps256:
+ case X86::BI__builtin_ia32_cmppd:
+ case X86::BI__builtin_ia32_cmppd256:
+ case X86::BI__builtin_ia32_cmpeqss:
+ case X86::BI__builtin_ia32_cmpltss:
+ case X86::BI__builtin_ia32_cmpless:
+ case X86::BI__builtin_ia32_cmpunordss:
+ case X86::BI__builtin_ia32_cmpneqss:
+ case X86::BI__builtin_ia32_cmpnltss:
+ case X86::BI__builtin_ia32_cmpnless:
+ case X86::BI__builtin_ia32_cmpordss:
+ case X86::BI__builtin_ia32_cmpeqsd:
+ case X86::BI__builtin_ia32_cmpltsd:
+ case X86::BI__builtin_ia32_cmplesd:
+ case X86::BI__builtin_ia32_cmpunordsd:
+ case X86::BI__builtin_ia32_cmpneqsd:
+ case X86::BI__builtin_ia32_cmpnltsd:
+ case X86::BI__builtin_ia32_cmpnlesd:
+ case X86::BI__builtin_ia32_cmpordsd:
+ case X86::BI__builtin_ia32_vcvtph2ps_mask:
+ case X86::BI__builtin_ia32_vcvtph2ps256_mask:
+ case X86::BI__builtin_ia32_vcvtph2ps512_mask:
+ case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
+ case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
+ case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
+ case X86::BI__cpuid:
+ case X86::BI__cpuidex:
+ case X86::BI__emul:
+ case X86::BI__emulu:
+ case X86::BI__mulh:
+ case X86::BI__umulh:
+ case X86::BI_mul128:
+ case X86::BI_umul128:
+ case X86::BI__faststorefence:
+ case X86::BI__shiftleft128:
+ case X86::BI__shiftright128:
+ case X86::BI_ReadWriteBarrier:
+ case X86::BI_ReadBarrier:
+ case X86::BI_WriteBarrier:
+ case X86::BI_AddressOfReturnAddress:
+ case X86::BI__stosb:
+ case X86::BI__ud2:
+ case X86::BI__int2c:
+ case X86::BI__readfsbyte:
+ case X86::BI__readfsword:
+ case X86::BI__readfsdword:
+ case X86::BI__readfsqword:
+ case X86::BI__readgsbyte:
+ case X86::BI__readgsword:
+ case X86::BI__readgsdword:
+ case X86::BI__readgsqword:
+ case X86::BI__builtin_ia32_encodekey128_u32:
+ case X86::BI__builtin_ia32_encodekey256_u32:
+ case X86::BI__builtin_ia32_aesenc128kl_u8:
+ case X86::BI__builtin_ia32_aesdec128kl_u8:
+ case X86::BI__builtin_ia32_aesenc256kl_u8:
+ case X86::BI__builtin_ia32_aesdec256kl_u8:
+ case X86::BI__builtin_ia32_aesencwide128kl_u8:
+ case X86::BI__builtin_ia32_aesdecwide128kl_u8:
+ case X86::BI__builtin_ia32_aesencwide256kl_u8:
+ case X86::BI__builtin_ia32_aesdecwide256kl_u8:
+ case X86::BI__builtin_ia32_vfcmaddcph512_mask:
+ case X86::BI__builtin_ia32_vfmaddcph512_mask:
+ case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
+ case X86::BI__builtin_ia32_vfmaddcsh_round_mask:
+ case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
+ case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
+ case X86::BI__builtin_ia32_prefetchi:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ }
+}
More information about the llvm-commits
mailing list