[clang] [llvm] [CIR][X86] Implement lowering for sqrt builtins (PR #169310)
Priyanshu Kumar via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 5 20:12:51 PST 2025
https://github.com/Priyanshu3820 updated https://github.com/llvm/llvm-project/pull/169310
>From 627bcb3bde64a780ed2b9aaaa9267d97c9679f9c Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Wed, 26 Nov 2025 17:45:00 +0530
Subject: [PATCH 01/27] Add CIR sqrt builtin support for X86
---
clang/include/clang/CIR/Dialect/IR/CIROps.td | 344 +++++++++++++++++-
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 66 +++-
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 323 +++++++++++++++-
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 14 +
.../CIR/CodeGen/X86/cir-sqrtps-builtins.c | 46 +++
5 files changed, 772 insertions(+), 21 deletions(-)
create mode 100644 clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index e612d6a0ba886..291b035e6204c 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -802,8 +802,8 @@ def CIR_ConditionOp : CIR_Op<"condition", [
//===----------------------------------------------------------------------===//
defvar CIR_YieldableScopes = [
- "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "GlobalOp", "IfOp",
- "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp", "TryOp"
+ "ArrayCtor", "ArrayDtor", "AwaitOp", "CaseOp", "DoWhileOp", "ForOp",
+ "GlobalOp", "IfOp", "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp", "TryOp"
];
def CIR_YieldOp : CIR_Op<"yield", [
@@ -1640,6 +1640,82 @@ def CIR_CmpOp : CIR_Op<"cmp", [Pure, SameTypeOperands]> {
let isLLVMLoweringRecursive = true;
}
+//===----------------------------------------------------------------------===//
+// BinOpOverflowOp
+//===----------------------------------------------------------------------===//
+
+def CIR_BinOpOverflowKind : CIR_I32EnumAttr<
+ "BinOpOverflowKind", "checked binary arithmetic operation kind", [
+ I32EnumAttrCase<"Add", 0, "add">,
+ I32EnumAttrCase<"Sub", 1, "sub">,
+ I32EnumAttrCase<"Mul", 2, "mul">
+]>;
+
+def CIR_BinOpOverflowOp : CIR_Op<"binop.overflow", [Pure, SameTypeOperands]> {
+ let summary = "Perform binary integral arithmetic with overflow checking";
+ let description = [{
+ `cir.binop.overflow` performs binary arithmetic operations with overflow
+ checking on integral operands.
+
+ The `kind` argument specifies the kind of arithmetic operation to perform.
+ It can be either `add`, `sub`, or `mul`. The `lhs` and `rhs` arguments
+ specify the input operands of the arithmetic operation. The types of `lhs`
+ and `rhs` must be the same.
+
+ `cir.binop.overflow` produces two SSA values. `result` is the result of the
+ arithmetic operation truncated to its specified type. `overflow` is a
+ boolean value indicating whether overflow happens during the operation.
+
+ The exact semantic of this operation is as follows:
+
+ - `lhs` and `rhs` are promoted to an imaginary integral type that has
+ infinite precision.
+ - The arithmetic operation is performed on the promoted operands.
+ - The infinite-precision result is truncated to the type of `result`. The
+ truncated result is assigned to `result`.
+ - If the truncated result is equal to the un-truncated result, `overflow`
+ is assigned to false. Otherwise, `overflow` is assigned to true.
+ }];
+
+ let arguments = (ins
+ CIR_BinOpOverflowKind:$kind,
+ CIR_IntType:$lhs,
+ CIR_IntType:$rhs
+ );
+
+ let results = (outs CIR_IntType:$result, CIR_BoolType:$overflow);
+
+ let assemblyFormat = [{
+ `(` $kind `,` $lhs `,` $rhs `)` `:` qualified(type($lhs)) `,`
+ `(` qualified(type($result)) `,` qualified(type($overflow)) `)`
+ attr-dict
+ }];
+
+ let builders = [
+ OpBuilder<(ins "cir::IntType":$resultTy,
+ "cir::BinOpOverflowKind":$kind,
+ "mlir::Value":$lhs,
+ "mlir::Value":$rhs), [{
+ auto overflowTy = cir::BoolType::get($_builder.getContext());
+ build($_builder, $_state, resultTy, overflowTy, kind, lhs, rhs);
+ }]>
+ ];
+
+ let extraLLVMLoweringPatternDecl = [{
+ static std::string getLLVMIntrinName(cir::BinOpOverflowKind opKind,
+ bool isSigned, unsigned width);
+
+ struct EncompassedTypeInfo {
+ bool sign;
+ unsigned width;
+ };
+
+ static EncompassedTypeInfo computeEncompassedTypeWidth(cir::IntType operandTy,
+ cir::IntType resultTy);
+ }];
+}
+
+
//===----------------------------------------------------------------------===//
// BinOp
//===----------------------------------------------------------------------===//
@@ -2533,7 +2609,9 @@ def CIR_FuncOp : CIR_Op<"func", [
OptionalAttr<DictArrayAttr>:$res_attrs,
OptionalAttr<FlatSymbolRefAttr>:$aliasee,
CIR_OptionalPriorityAttr:$global_ctor_priority,
- CIR_OptionalPriorityAttr:$global_dtor_priority);
+ CIR_OptionalPriorityAttr:$global_dtor_priority,
+ OptionalAttr<CIR_CXXSpecialMemberAttr>:$cxx_special_member
+ );
let regions = (region AnyRegion:$body);
@@ -2572,7 +2650,32 @@ def CIR_FuncOp : CIR_Op<"func", [
//===------------------------------------------------------------------===//
bool isDeclaration();
- }];
+
+ //===------------------------------------------------------------------===//
+ // C++ Special Member Functions
+ //===------------------------------------------------------------------===//
+
+ /// Returns true if this function is a C++ special member function.
+ bool isCXXSpecialMemberFunction();
+
+ bool isCxxConstructor();
+ bool isCxxDestructor();
+
+ /// Returns true if this function is a copy or move assignment operator.
+ bool isCxxSpecialAssignment();
+
+ /// Returns the kind of constructor this function represents, if any.
+ std::optional<CtorKind> getCxxConstructorKind();
+
+ /// Returns the kind of assignment operator (move, copy) this function
+ /// represents, if any.
+ std::optional<AssignKind> getCxxSpecialAssignKind();
+
+ /// Returns true if the function is a trivial C++ member functions such as
+ /// trivial default constructor, copy/move constructor, copy/move assignment,
+ /// or destructor.
+ bool isCxxTrivialMemberFunction();
+}];
let hasCustomAssemblyFormat = 1;
let hasVerifier = 1;
@@ -2752,6 +2855,100 @@ def CIR_CallOp : CIR_CallOpBase<"call", [NoRegionArguments]> {
];
}
+//===----------------------------------------------------------------------===//
+// AwaitOp
+//===----------------------------------------------------------------------===//
+
+def CIR_AwaitKind : CIR_I32EnumAttr<"AwaitKind", "await kind", [
+ I32EnumAttrCase<"Init", 0, "init">,
+ I32EnumAttrCase<"User", 1, "user">,
+ I32EnumAttrCase<"Yield", 2, "yield">,
+ I32EnumAttrCase<"Final", 3, "final">
+]>;
+
+def CIR_AwaitOp : CIR_Op<"await",[
+ DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+ RecursivelySpeculatable, NoRegionArguments
+]> {
+ let summary = "Wraps C++ co_await implicit logic";
+ let description = [{
+ The under the hood effect of using C++ `co_await expr` roughly
+ translates to:
+
+ ```c++
+ // co_await expr;
+
+ auto &&x = CommonExpr();
+ if (!x.await_ready()) {
+ ...
+ x.await_suspend(...);
+ ...
+ }
+ x.await_resume();
+ ```
+
+ `cir.await` represents this logic by using 3 regions:
+ - ready: covers veto power from x.await_ready()
+ - suspend: wraps actual x.await_suspend() logic
+ - resume: handles x.await_resume()
+
+ Breaking this up in regions allows individual scrutiny of conditions
+ which might lead to folding some of them out. Lowerings coming out
+ of CIR, e.g. LLVM, should use the `suspend` region to track more
+ lower level codegen (e.g. intrinsic emission for coro.save/coro.suspend).
+
+ There are also 4 flavors of `cir.await` available:
+ - `init`: compiler generated initial suspend via implicit `co_await`.
+ - `user`: also known as normal, representing a user written `co_await`.
+ - `yield`: user written `co_yield` expressions.
+ - `final`: compiler generated final suspend via implicit `co_await`.
+
+ ```mlir
+ cir.scope {
+ ... // auto &&x = CommonExpr();
+ cir.await(user, ready : {
+ ... // x.await_ready()
+ }, suspend : {
+ ... // x.await_suspend()
+ }, resume : {
+ ... // x.await_resume()
+ })
+ }
+ ```
+
+ Note that resulution of the common expression is assumed to happen
+ as part of the enclosing await scope.
+ }];
+
+ let arguments = (ins CIR_AwaitKind:$kind);
+ let regions = (region SizedRegion<1>:$ready,
+ SizedRegion<1>:$suspend,
+ SizedRegion<1>:$resume);
+ let assemblyFormat = [{
+ `(` $kind `,`
+ `ready` `:` $ready `,`
+ `suspend` `:` $suspend `,`
+ `resume` `:` $resume `,`
+ `)`
+ attr-dict
+ }];
+
+ let skipDefaultBuilders = 1;
+ let builders = [
+ OpBuilder<(ins
+ "cir::AwaitKind":$kind,
+ CArg<"BuilderCallbackRef",
+ "nullptr">:$readyBuilder,
+ CArg<"BuilderCallbackRef",
+ "nullptr">:$suspendBuilder,
+ CArg<"BuilderCallbackRef",
+ "nullptr">:$resumeBuilder
+ )>
+ ];
+
+ let hasVerifier = 1;
+}
+
//===----------------------------------------------------------------------===//
// CopyOp
//===----------------------------------------------------------------------===//
@@ -2988,6 +3185,39 @@ def CIR_InlineAsmOp : CIR_Op<"asm", [RecursiveMemoryEffects]> {
let hasCustomAssemblyFormat = 1;
}
+//===----------------------------------------------------------------------===//
+// SqrtOp
+//===----------------------------------------------------------------------===//
+
+def CIR_SqrtOp : CIR_Op<"sqrt", [Pure]> {
+ let summary = "Floating-point square root";
+
+ let description = [{
+ The `cir.sqrt` operation computes the element-wise square root of its input.
+
+ The input must be either:
+ • a floating-point scalar type, or
+ • a vector whose element type is floating-point.
+
+ The result type must match the input type exactly.
+
+ Examples:
+ // scalar
+ %r = cir.sqrt %x : !cir.fp64
+
+ // vector
+ %v = cir.sqrt %vec : !cir.vector<!cir.fp32 x 4>
+ }];
+
+ // input and output types: float or vector-of-float
+ let arguments = (ins CIR_AnyFloatOrVecOfFloatType:$input);
+ let results = (outs CIR_AnyFloatOrVecOfFloatType:$result);
+
+ let assemblyFormat = [{
+ $input `:` type($input) attr-dict
+ }];
+}
+
//===----------------------------------------------------------------------===//
// UnreachableOp
//===----------------------------------------------------------------------===//
@@ -4018,6 +4248,72 @@ def CIR_RotateOp : CIR_Op<"rotate", [Pure, SameOperandsAndResultType]> {
let hasFolder = 1;
}
+//===----------------------------------------------------------------------===//
+// FPClass Test Flags
+//===----------------------------------------------------------------------===//
+
+def FPClassTestEnum : CIR_I32EnumAttr<"FPClassTest", "floating-point class test flags", [
+ // Basic flags
+ I32EnumAttrCase<"SignalingNaN", 1, "fcSNan">,
+ I32EnumAttrCase<"QuietNaN", 2, "fcQNan">,
+ I32EnumAttrCase<"NegativeInfinity", 4, "fcNegInf">,
+ I32EnumAttrCase<"NegativeNormal", 8, "fcNegNormal">,
+ I32EnumAttrCase<"NegativeSubnormal", 16, "fcNegSubnormal">,
+ I32EnumAttrCase<"NegativeZero", 32, "fcNegZero">,
+ I32EnumAttrCase<"PositiveZero", 64, "fcPosZero">,
+ I32EnumAttrCase<"PositiveSubnormal", 128, "fcPosSubnormal">,
+ I32EnumAttrCase<"PositiveNormal", 256, "fcPosNormal">,
+ I32EnumAttrCase<"PositiveInfinity", 512, "fcPosInf">,
+
+ // Composite flags
+ I32EnumAttrCase<"Nan", 3, "fcNan">, // fcSNan | fcQNan
+ I32EnumAttrCase<"Infinity", 516, "fcInf">, // fcPosInf | fcNegInf
+ I32EnumAttrCase<"Normal", 264, "fcNormal">, // fcPosNormal | fcNegNormal
+ I32EnumAttrCase<"Subnormal", 144, "fcSubnormal">, // fcPosSubnormal | fcNegSubnormal
+ I32EnumAttrCase<"Zero", 96, "fcZero">, // fcPosZero | fcNegZero
+ I32EnumAttrCase<"PositiveFinite", 448, "fcPosFinite">,// fcPosNormal | fcPosSubnormal | fcPosZero
+ I32EnumAttrCase<"NegativeFinite", 56, "fcNegFinite">, // fcNegNormal | fcNegSubnormal | fcNegZero
+ I32EnumAttrCase<"Finite", 504, "fcFinite">, // fcPosFinite | fcNegFinite
+ I32EnumAttrCase<"Positive", 960, "fcPositive">, // fcPosFinite | fcPosInf
+ I32EnumAttrCase<"Negative", 60, "fcNegative">, // fcNegFinite | fcNegInf
+ I32EnumAttrCase<"All", 1023, "fcAllFlags">, // fcNan | fcInf | fcFinite
+]> {
+ let cppNamespace = "::cir";
+}
+
+def CIR_IsFPClassOp : CIR_Op<"is_fp_class"> {
+ let summary = "Corresponding to the `__builtin_fpclassify` builtin function in clang";
+
+ let description = [{
+ The `cir.is_fp_class` operation takes a floating-point value as its first
+ argument and a bitfield of flags as its second argument. The operation
+ returns a boolean value indicating whether the floating-point value
+ satisfies the given flags.
+
+ The flags must be a compile time constant and the values are:
+
+ | Bit # | floating-point class |
+ | ----- | -------------------- |
+ | 0 | Signaling NaN |
+ | 1 | Quiet NaN |
+ | 2 | Negative infinity |
+ | 3 | Negative normal |
+ | 4 | Negative subnormal |
+ | 5 | Negative zero |
+ | 6 | Positive zero |
+ | 7 | Positive subnormal |
+ | 8 | Positive normal |
+ | 9 | Positive infinity |
+ }];
+
+ let arguments = (ins CIR_AnyFloatType:$src,
+ FPClassTestEnum:$flags);
+ let results = (outs CIR_BoolType:$result);
+ let assemblyFormat = [{
+ $src `,` $flags `:` functional-type($src, $result) attr-dict
+ }];
+}
+
//===----------------------------------------------------------------------===//
// Assume Operations
//===----------------------------------------------------------------------===//
@@ -4202,7 +4498,7 @@ def CIR_ObjSizeOp : CIR_Op<"objsize", [Pure]> {
When the `min` attribute is present, the operation returns the minimum
guaranteed accessible size. When absent (max mode), it returns the maximum
possible object size. Corresponds to `llvm.objectsize`'s `min` argument.
-
+
The `dynamic` attribute determines if the value should be evaluated at
runtime. Corresponds to `llvm.objectsize`'s `dynamic` argument.
@@ -4658,6 +4954,44 @@ def CIR_TryOp : CIR_Op<"try",[
let hasLLVMLowering = false;
}
+//===----------------------------------------------------------------------===//
+// Exception related: EhInflightOp
+//===----------------------------------------------------------------------===//
+
+def CIR_EhInflightOp : CIR_Op<"eh.inflight_exception"> {
+ let summary = "Materialize the catch clause formal parameter";
+ let description = [{
+ `cir.eh.inflight_exception` returns two values:
+ - `exception_ptr`: The exception pointer for the inflight exception
+ - `type_id`: the type info index for the exception type
+ This operation is expected to be the first operation in the unwind
+ destination basic blocks of a `cir.try_call` operation.
+
+ The `cleanup` attribute indicates that clean up code must be run before the
+ values produced by this operation are used to dispatch the exception. This
+ cleanup code must be executed even if the exception is not caught.
+ This helps CIR to pass down more accurate information for LLVM lowering
+ to landingpads.
+
+ Example:
+
+ ```mlir
+ %exception_ptr, %type_id = cir.eh.inflight_exception
+ %exception_ptr, %type_id = cir.eh.inflight_exception [@_ZTIi, @_ZTIPKc]
+ %exception_ptr, %type_id = cir.eh.inflight_exception cleanup
+ ``
+ }];
+
+ let arguments = (ins UnitAttr:$cleanup,
+ OptionalAttr<FlatSymbolRefArrayAttr>:$catch_type_list);
+ let results = (outs CIR_VoidPtrType:$exception_ptr, CIR_UInt32:$type_id);
+ let assemblyFormat = [{
+ (`cleanup` $cleanup^)?
+ ($catch_type_list^)?
+ attr-dict
+ }];
+}
+
//===----------------------------------------------------------------------===//
// Atomic operations
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index ee6900141647f..e91a9e4db229a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -121,20 +121,36 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
return emitIntrinsicCallOp(*this, expr, "x86.sse.sfence", voidTy);
case X86::BI_mm_prefetch:
case X86::BI__rdtsc:
- case X86::BI__builtin_ia32_rdtscp:
+ case X86::BI__builtin_ia32_rdtscp: {
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ }
case X86::BI__builtin_ia32_lzcnt_u16:
case X86::BI__builtin_ia32_lzcnt_u32:
- case X86::BI__builtin_ia32_lzcnt_u64:
+ case X86::BI__builtin_ia32_lzcnt_u64: {
+ mlir::Value isZeroPoison = builder.getFalse(getLoc(expr->getExprLoc()));
+ return emitIntrinsicCallOp(*this, expr, "ctlz", ops[0].getType(),
+ mlir::ValueRange{ops[0], isZeroPoison});
+ }
case X86::BI__builtin_ia32_tzcnt_u16:
case X86::BI__builtin_ia32_tzcnt_u32:
- case X86::BI__builtin_ia32_tzcnt_u64:
+ case X86::BI__builtin_ia32_tzcnt_u64: {
+ mlir::Value isZeroPoison = builder.getFalse(getLoc(expr->getExprLoc()));
+ return emitIntrinsicCallOp(*this, expr, "cttz", ops[0].getType(),
+ mlir::ValueRange{ops[0], isZeroPoison});
+ }
case X86::BI__builtin_ia32_undef128:
case X86::BI__builtin_ia32_undef256:
case X86::BI__builtin_ia32_undef512:
- cgm.errorNYI(expr->getSourceRange(),
- std::string("unimplemented X86 builtin call: ") +
- getContext().BuiltinInfo.getName(builtinID));
- return {};
+ // The x86 definition of "undef" is not the same as the LLVM definition
+ // (PR32176). We leave optimizing away an unnecessary zero constant to the
+ // IR optimizer and backend.
+ // TODO: If we had a "freeze" IR instruction to generate a fixed undef
+ // value, we should use that here instead of a zero.
+ return builder.getNullValue(convertType(expr->getType()),
+ getLoc(expr->getExprLoc()));
case X86::BI__builtin_ia32_vec_ext_v4hi:
case X86::BI__builtin_ia32_vec_ext_v16qi:
case X86::BI__builtin_ia32_vec_ext_v8hi:
@@ -169,10 +185,26 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_vec_set_v16hi:
case X86::BI__builtin_ia32_vec_set_v8si:
case X86::BI__builtin_ia32_vec_set_v4di:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
case X86::BI_mm_setcsr:
- case X86::BI__builtin_ia32_ldmxcsr:
+ case X86::BI__builtin_ia32_ldmxcsr: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ Address tmp = createMemTemp(expr->getArg(0)->getType(), loc);
+ builder.createStore(loc, ops[0], tmp);
+ return emitIntrinsicCallOp(*this, expr, "x86.sse.ldmxcsr",
+ builder.getVoidTy(), tmp.getPointer());
+ }
case X86::BI_mm_getcsr:
- case X86::BI__builtin_ia32_stmxcsr:
+ case X86::BI__builtin_ia32_stmxcsr: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ Address tmp = createMemTemp(expr->getType(), loc);
+ emitIntrinsicCallOp(*this, expr, "x86.sse.stmxcsr", builder.getVoidTy(),
+ tmp.getPointer());
+ return builder.createLoad(loc, tmp);
+ }
case X86::BI__builtin_ia32_xsave:
case X86::BI__builtin_ia32_xsave64:
case X86::BI__builtin_ia32_xrstor:
@@ -681,10 +713,24 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_sqrtsh_round_mask:
case X86::BI__builtin_ia32_sqrtsd_round_mask:
case X86::BI__builtin_ia32_sqrtss_round_mask:
+ errorNYI("masked round sqrt builtins");
+ return {};
case X86::BI__builtin_ia32_sqrtpd256:
case X86::BI__builtin_ia32_sqrtpd:
case X86::BI__builtin_ia32_sqrtps256:
- case X86::BI__builtin_ia32_sqrtps:
+ case X86::BI__builtin_ia32_sqrtps: {
+ mlir::Location loc = getLoc(E->getExprLoc());
+ assert(E->getNumArgs() == 1 && "__builtin_ia32_sqrtps takes one argument");
+ mlir::Value arg = emitScalarExpr(E->getArg(0));
+ mlir::Type argTy = arg.getType();
+ if (auto vecTy = argTy.dyn_cast<mlir::VectorType>()) {
+ assert(vecTy.getNumElements() == 4 &&
+ vecTy.getElementType().isa<mlir::FloatType>() &&
+ "__builtin_ia32_sqrtps expects <4 x float> / __m128");
+ }
+ auto sqrt = cir::SqrtOp > ::create(builder, loc, argTy, arg);
+ return sqrt.getResult();
+ }
case X86::BI__builtin_ia32_sqrtph256:
case X86::BI__builtin_ia32_sqrtph:
case X86::BI__builtin_ia32_sqrtph512:
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index d43a462a25092..937c66082ca40 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -30,6 +30,7 @@
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Export.h"
#include "mlir/Transforms/DialectConversion.h"
+#include "clang/Basic/LLVM.h"
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
@@ -44,6 +45,96 @@
using namespace cir;
using namespace llvm;
+using namespace mlir;
+
+static std::string getLLVMIntrinsicNameForType(Type llvmTy) {
+ std::string s;
+ {
+ llvm::raw_string_ostream os(s);
+ llvm::Type *unused = nullptr;
+ os << llvmTy;
+ }
+ if (auto vecTy = llvmTy.dyn_cast<LLVM::LLVMType>()) {
+ }
+ return s;
+}
+
+// Actual lowering
+LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
+ cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
+ ConversionPatternRewriter &rewriter) const {
+
+ Location loc = op.getLoc();
+ MLIRContext *ctx = rewriter.getContext();
+
+ Type cirResTy = op.getResult().getType();
+ Type llvmResTy = getTypeConverter()->convertType(cirResTy);
+ if (!llvmResTy)
+ return op.emitOpError(
+ "expected LLVM dialect result type for cir.sqrt lowering");
+
+ Value operand = adaptor.getInput();
+ Value llvmOperand = operand;
+ if (operand.getType() != llvmResTy) {
+ llvmOperand = rewriter.create<LLVM::BitcastOp>(loc, llvmResTy, operand);
+ }
+
+ // Build the llvm.sqrt.* intrinsic name depending on scalar vs vector result
+ std::string intrinsicName = "llvm.sqrt.";
+ std::string suffix;
+
+ // If the CIR result type is a vector, include the 'vN' part in the suffix.
+ if (auto vec = cirResTy.dyn_cast<cir::VectorType>()) {
+ Type elt = vec.getElementType();
+ if (auto f = elt.dyn_cast<cir::FloatType>()) {
+ unsigned width = f.getWidth();
+ unsigned n = vec.getNumElements();
+ if (width == 32)
+ suffix = "v" + std::to_string(n) + "f32";
+ else if (width == 64)
+ suffix = "v" + std::to_string(n) + "f64";
+ else if (width == 16)
+ suffix = "v" + std::to_string(n) + "f16";
+ else
+ return op.emitOpError("unsupported float width for sqrt");
+ } else {
+ return op.emitOpError("vector element must be floating point for sqrt");
+ }
+ } else if (auto f = cirResTy.dyn_cast<cir::FloatType>()) {
+ // Scalar float
+ unsigned width = f.getWidth();
+ if (width == 32)
+ suffix = "f32";
+ else if (width == 64)
+ suffix = "f64";
+ else if (width == 16)
+ suffix = "f16";
+ else
+ return op.emitOpError("unsupported float width for sqrt");
+ } else {
+ return op.emitOpError("unsupported type for cir.sqrt lowering");
+ }
+
+ intrinsicName += suffix;
+
+ // Ensure the llvm intrinsic function exists at module scope. Insert it at
+ // the start of the module body using an insertion guard.
+ ModuleOp module = op->getParentOfType<ModuleOp>();
+ if (!module.lookupSymbol<LLVM::LLVMFuncOp>(intrinsicName)) {
+ OpBuilder::InsertionGuard guard(rewriter);
+ rewriter.setInsertionPointToStart(module.getBody());
+ auto llvmFnType = LLVM::LLVMType::getFunctionTy(llvmResTy, {llvmResTy},
+ /*isVarArg=*/false);
+ rewriter.create<LLVM::LLVMFuncOp>(loc, intrinsicName, llvmFnType);
+ }
+
+ // Create the call and replace cir.sqrt
+ auto callee = SymbolRefAttr::get(ctx, intrinsicName);
+ rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, llvmResTy, callee,
+ ArrayRef<Value>{llvmOperand});
+
+ return mlir::success();
+}
namespace cir {
namespace direct {
@@ -284,7 +375,10 @@ void convertSideEffectForCall(mlir::Operation *callOp, bool isNothrow,
memoryEffect = mlir::LLVM::MemoryEffectsAttr::get(
callOp->getContext(), /*other=*/ModRefInfo::Ref,
/*argMem=*/ModRefInfo::Ref,
- /*inaccessibleMem=*/ModRefInfo::Ref);
+ /*inaccessibleMem=*/ModRefInfo::Ref,
+ /*errnoMem=*/ModRefInfo::Ref,
+ /*targetMem0=*/ModRefInfo::Ref,
+ /*targetMem1=*/ModRefInfo::Ref);
noUnwind = true;
willReturn = true;
break;
@@ -293,7 +387,10 @@ void convertSideEffectForCall(mlir::Operation *callOp, bool isNothrow,
memoryEffect = mlir::LLVM::MemoryEffectsAttr::get(
callOp->getContext(), /*other=*/ModRefInfo::NoModRef,
/*argMem=*/ModRefInfo::NoModRef,
- /*inaccessibleMem=*/ModRefInfo::NoModRef);
+ /*inaccessibleMem=*/ModRefInfo::NoModRef,
+ /*errnoMem=*/ModRefInfo::NoModRef,
+ /*targetMem0=*/ModRefInfo::NoModRef,
+ /*targetMem1=*/ModRefInfo::NoModRef);
noUnwind = true;
willReturn = true;
break;
@@ -670,6 +767,18 @@ mlir::LogicalResult CIRToLLVMASinOpLowering::matchAndRewrite(
return mlir::success();
}
+mlir::LogicalResult CIRToLLVMIsFPClassOpLowering::matchAndRewrite(
+ cir::IsFPClassOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ mlir::Value src = adaptor.getSrc();
+ cir::FPClassTest flags = adaptor.getFlags();
+ mlir::IntegerType retTy = rewriter.getI1Type();
+
+ rewriter.replaceOpWithNewOp<mlir::LLVM::IsFPClass>(
+ op, retTy, src, static_cast<uint32_t>(flags));
+ return mlir::success();
+}
+
mlir::LogicalResult CIRToLLVMAssumeOpLowering::matchAndRewrite(
cir::AssumeOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
@@ -1995,7 +2104,6 @@ void CIRToLLVMGlobalOpLowering::setupRegionInitializedLLVMGlobalOp(
// attributes are available on cir.global ops. This duplicates code
// in CIRToLLVMGlobalOpLowering::matchAndRewrite() but that will go
// away when the placeholders are no longer needed.
- assert(!cir::MissingFeatures::opGlobalConstant());
const bool isConst = op.getConstant();
assert(!cir::MissingFeatures::addressSpace());
const unsigned addrSpace = 0;
@@ -2055,8 +2163,7 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite(
convertTypeForMemory(*getTypeConverter(), dataLayout, cirSymType);
// FIXME: These default values are placeholders until the the equivalent
// attributes are available on cir.global ops.
- assert(!cir::MissingFeatures::opGlobalConstant());
- const bool isConst = false;
+ const bool isConst = op.getConstant();
assert(!cir::MissingFeatures::addressSpace());
const unsigned addrSpace = 0;
const bool isDsoLocal = op.getDsoLocal();
@@ -2570,6 +2677,120 @@ mlir::LogicalResult CIRToLLVMCmpOpLowering::matchAndRewrite(
return cmpOp.emitError() << "unsupported type for CmpOp: " << type;
}
+mlir::LogicalResult CIRToLLVMBinOpOverflowOpLowering::matchAndRewrite(
+ cir::BinOpOverflowOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ mlir::Location loc = op.getLoc();
+ cir::BinOpOverflowKind arithKind = op.getKind();
+ cir::IntType operandTy = op.getLhs().getType();
+ cir::IntType resultTy = op.getResult().getType();
+
+ EncompassedTypeInfo encompassedTyInfo =
+ computeEncompassedTypeWidth(operandTy, resultTy);
+ mlir::IntegerType encompassedLLVMTy =
+ rewriter.getIntegerType(encompassedTyInfo.width);
+
+ mlir::Value lhs = adaptor.getLhs();
+ mlir::Value rhs = adaptor.getRhs();
+ if (operandTy.getWidth() < encompassedTyInfo.width) {
+ if (operandTy.isSigned()) {
+ lhs = mlir::LLVM::SExtOp::create(rewriter, loc, encompassedLLVMTy, lhs);
+ rhs = mlir::LLVM::SExtOp::create(rewriter, loc, encompassedLLVMTy, rhs);
+ } else {
+ lhs = mlir::LLVM::ZExtOp::create(rewriter, loc, encompassedLLVMTy, lhs);
+ rhs = mlir::LLVM::ZExtOp::create(rewriter, loc, encompassedLLVMTy, rhs);
+ }
+ }
+
+ std::string intrinName = getLLVMIntrinName(arithKind, encompassedTyInfo.sign,
+ encompassedTyInfo.width);
+ auto intrinNameAttr = mlir::StringAttr::get(op.getContext(), intrinName);
+
+ mlir::IntegerType overflowLLVMTy = rewriter.getI1Type();
+ auto intrinRetTy = mlir::LLVM::LLVMStructType::getLiteral(
+ rewriter.getContext(), {encompassedLLVMTy, overflowLLVMTy});
+
+ auto callLLVMIntrinOp = mlir::LLVM::CallIntrinsicOp::create(
+ rewriter, loc, intrinRetTy, intrinNameAttr, mlir::ValueRange{lhs, rhs});
+ mlir::Value intrinRet = callLLVMIntrinOp.getResult(0);
+
+ mlir::Value result = mlir::LLVM::ExtractValueOp::create(
+ rewriter, loc, intrinRet, ArrayRef<int64_t>{0})
+ .getResult();
+ mlir::Value overflow = mlir::LLVM::ExtractValueOp::create(
+ rewriter, loc, intrinRet, ArrayRef<int64_t>{1})
+ .getResult();
+
+ if (resultTy.getWidth() < encompassedTyInfo.width) {
+ mlir::Type resultLLVMTy = getTypeConverter()->convertType(resultTy);
+ auto truncResult =
+ mlir::LLVM::TruncOp::create(rewriter, loc, resultLLVMTy, result);
+
+ // Extend the truncated result back to the encompassing type to check for
+ // any overflows during the truncation.
+ mlir::Value truncResultExt;
+ if (resultTy.isSigned())
+ truncResultExt = mlir::LLVM::SExtOp::create(
+ rewriter, loc, encompassedLLVMTy, truncResult);
+ else
+ truncResultExt = mlir::LLVM::ZExtOp::create(
+ rewriter, loc, encompassedLLVMTy, truncResult);
+ auto truncOverflow = mlir::LLVM::ICmpOp::create(
+ rewriter, loc, mlir::LLVM::ICmpPredicate::ne, truncResultExt, result);
+
+ result = truncResult;
+ overflow = mlir::LLVM::OrOp::create(rewriter, loc, overflow, truncOverflow);
+ }
+
+ mlir::Type boolLLVMTy =
+ getTypeConverter()->convertType(op.getOverflow().getType());
+ if (boolLLVMTy != rewriter.getI1Type())
+ overflow = mlir::LLVM::ZExtOp::create(rewriter, loc, boolLLVMTy, overflow);
+
+ rewriter.replaceOp(op, mlir::ValueRange{result, overflow});
+
+ return mlir::success();
+}
+
+std::string CIRToLLVMBinOpOverflowOpLowering::getLLVMIntrinName(
+ cir::BinOpOverflowKind opKind, bool isSigned, unsigned width) {
+ // The intrinsic name is `@llvm.{s|u}{opKind}.with.overflow.i{width}`
+
+ std::string name = "llvm.";
+
+ if (isSigned)
+ name.push_back('s');
+ else
+ name.push_back('u');
+
+ switch (opKind) {
+ case cir::BinOpOverflowKind::Add:
+ name.append("add.");
+ break;
+ case cir::BinOpOverflowKind::Sub:
+ name.append("sub.");
+ break;
+ case cir::BinOpOverflowKind::Mul:
+ name.append("mul.");
+ break;
+ }
+
+ name.append("with.overflow.i");
+ name.append(std::to_string(width));
+
+ return name;
+}
+
+CIRToLLVMBinOpOverflowOpLowering::EncompassedTypeInfo
+CIRToLLVMBinOpOverflowOpLowering::computeEncompassedTypeWidth(
+ cir::IntType operandTy, cir::IntType resultTy) {
+ bool sign = operandTy.getIsSigned() || resultTy.getIsSigned();
+ unsigned width =
+ std::max(operandTy.getWidth() + (sign && operandTy.isUnsigned()),
+ resultTy.getWidth() + (sign && resultTy.isUnsigned()));
+ return {sign, width};
+}
+
mlir::LogicalResult CIRToLLVMShiftOpLowering::matchAndRewrite(
cir::ShiftOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
@@ -3100,6 +3321,90 @@ mlir::LogicalResult CIRToLLVMAllocExceptionOpLowering::matchAndRewrite(
return mlir::success();
}
+static mlir::LLVM::LLVMStructType
+getLLVMLandingPadStructTy(mlir::ConversionPatternRewriter &rewriter) {
+ // Create the landing pad type: struct { ptr, i32 }
+ mlir::MLIRContext *ctx = rewriter.getContext();
+ auto llvmPtr = mlir::LLVM::LLVMPointerType::get(ctx);
+ llvm::SmallVector<mlir::Type> structFields = {llvmPtr, rewriter.getI32Type()};
+ return mlir::LLVM::LLVMStructType::getLiteral(ctx, structFields);
+}
+
+mlir::LogicalResult CIRToLLVMEhInflightOpLowering::matchAndRewrite(
+ cir::EhInflightOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ auto llvmFn = op->getParentOfType<mlir::LLVM::LLVMFuncOp>();
+ assert(llvmFn && "expected LLVM function parent");
+ mlir::Block *entryBlock = &llvmFn.getRegion().front();
+ assert(entryBlock->isEntryBlock());
+
+ mlir::ArrayAttr catchListAttr = op.getCatchTypeListAttr();
+ mlir::SmallVector<mlir::Value> catchSymAddrs;
+
+ auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext());
+ mlir::Location loc = op.getLoc();
+
+ // %landingpad = landingpad { ptr, i32 }
+ // Note that since llvm.landingpad has to be the first operation on the
+ // block, any needed value for its operands has to be added somewhere else.
+ if (catchListAttr) {
+ // catch ptr @_ZTIi
+ // catch ptr @_ZTIPKc
+ for (mlir::Attribute catchAttr : catchListAttr) {
+ auto symAttr = cast<mlir::FlatSymbolRefAttr>(catchAttr);
+ // Generate `llvm.mlir.addressof` for each symbol, and place those
+ // operations in the LLVM function entry basic block.
+ mlir::OpBuilder::InsertionGuard guard(rewriter);
+ rewriter.setInsertionPointToStart(entryBlock);
+ mlir::Value addrOp = mlir::LLVM::AddressOfOp::create(
+ rewriter, loc, llvmPtrTy, symAttr.getValue());
+ catchSymAddrs.push_back(addrOp);
+ }
+ } else if (!op.getCleanup()) {
+ // We need to emit catch-all only if cleanup is not set, because when we
+ // have catch-all handler, there is no case when we set would unwind past
+ // the handler
+ mlir::OpBuilder::InsertionGuard guard(rewriter);
+ rewriter.setInsertionPointToStart(entryBlock);
+ mlir::Value nullOp = mlir::LLVM::ZeroOp::create(rewriter, loc, llvmPtrTy);
+ catchSymAddrs.push_back(nullOp);
+ }
+
+ // %slot = extractvalue { ptr, i32 } %x, 0
+ // %selector = extractvalue { ptr, i32 } %x, 1
+ mlir::LLVM::LLVMStructType llvmLandingPadStructTy =
+ getLLVMLandingPadStructTy(rewriter);
+ auto landingPadOp = mlir::LLVM::LandingpadOp::create(
+ rewriter, loc, llvmLandingPadStructTy, catchSymAddrs);
+
+ if (op.getCleanup())
+ landingPadOp.setCleanup(true);
+
+ mlir::Value slot =
+ mlir::LLVM::ExtractValueOp::create(rewriter, loc, landingPadOp, 0);
+ mlir::Value selector =
+ mlir::LLVM::ExtractValueOp::create(rewriter, loc, landingPadOp, 1);
+ rewriter.replaceOp(op, mlir::ValueRange{slot, selector});
+
+ // Landing pads are required to be in LLVM functions with personality
+ // attribute.
+ // TODO(cir): for now hardcode personality creation in order to start
+ // adding exception tests, once we annotate CIR with such information,
+ // change it to be in FuncOp lowering instead.
+ mlir::OpBuilder::InsertionGuard guard(rewriter);
+ // Insert personality decl before the current function.
+ rewriter.setInsertionPoint(llvmFn);
+ auto personalityFnTy =
+ mlir::LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {},
+ /*isVarArg=*/true);
+
+ const StringRef fnName = "__gxx_personality_v0";
+ createLLVMFuncOpIfNotExist(rewriter, op, fnName, personalityFnTy);
+ llvmFn.setPersonality(fnName);
+
+ return mlir::success();
+}
+
mlir::LogicalResult CIRToLLVMTrapOpLowering::matchAndRewrite(
cir::TrapOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
@@ -3843,6 +4148,12 @@ mlir::LogicalResult CIRToLLVMBlockAddressOpLowering::matchAndRewrite(
return mlir::failure();
}
+mlir::LogicalResult CIRToLLVMAwaitOpLowering::matchAndRewrite(
+ cir::AwaitOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ return mlir::failure();
+}
+
std::unique_ptr<mlir::Pass> createConvertCIRToLLVMPass() {
return std::make_unique<ConvertCIRToLLVMPass>();
}
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
index 0591de545b81d..be6a380372efe 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
@@ -12,11 +12,25 @@
#ifndef CLANG_CIR_LOWERTOLLVM_H
#define CLANG_CIR_LOWERTOLLVM_H
+#include "mlir/Conversion/PatternRewriter.h"
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Transforms/DialectConversion.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
+namespace cir {
+class SqrtOp;
+}
+
+class CIRToLLVMSqrtOpLowering : public mlir::OpConversionPattern<cir::SqrtOp> {
+public:
+ using mlir::OpConversionPattern<cir::SqrtOp>::OpConversionPattern;
+
+ mlir::LogicalResult
+ matchAndRewrite(cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const override;
+};
+
namespace cir {
namespace direct {
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
new file mode 100644
index 0000000000000..6e1dace82928c
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
@@ -0,0 +1,46 @@
+// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtss, sqrtsd, etc.)
+// RUN: %clang_cc1 -fcir -triple x86_64-unknown-linux-gnu -O0 %s -emit-cir -o - | FileCheck %s
+
+#include <immintrin.h>
+
+// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit)
+__m128 test_sqrtps(__m128 x) {
+ return __builtin_ia32_sqrtps(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit)
+__m256 test_sqrtps256(__m256 x) {
+ return __builtin_ia32_sqrtps256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit)
+__m512 test_sqrtps512(__m512 x) {
+ return __builtin_ia32_sqrtps512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps512
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit)
+__m128d test_sqrtpd(__m128d x) {
+ return __builtin_ia32_sqrtpd(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit)
+__m256d test_sqrtpd256(__m256d x) {
+ return __builtin_ia32_sqrtpd256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit)
+__m512d test_sqrtpd512(__m512d x) {
+ return __builtin_ia32_sqrtpd512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd512
+// CHECK: cir.sqrt
>From 4a39fd7185cd294b96a4faadc2fa21f2a4d53b6b Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Sat, 29 Nov 2025 09:59:40 +0530
Subject: [PATCH 02/27] Implement sqrt builtins for all vector sizes
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 20 ++----
.../test/CIR/CodeGen/X86/cir-sqrt-builtins.c | 67 +++++++++++++++++++
2 files changed, 73 insertions(+), 14 deletions(-)
create mode 100644 clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 45c0de322925a..f8a139ec7a8e0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -786,24 +786,16 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_sqrtpd256:
case X86::BI__builtin_ia32_sqrtpd:
case X86::BI__builtin_ia32_sqrtps256:
- case X86::BI__builtin_ia32_sqrtps: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- assert(expr->getNumArgs() == 1 && "__builtin_ia32_sqrtps takes one argument");
- mlir::Value arg = emitScalarExpr(expr->getArg(0));
- mlir::Type argTy = arg.getType();
- if (auto vecTy = argTy.dyn_cast<mlir::VectorType>()) {
- assert(vecTy.getNumElements() == 4 &&
- vecTy.getElementType().isa<mlir::FloatType>() &&
- "__builtin_ia32_sqrtps expects <4 x float> / __m128");
- }
- auto sqrt = cir::SqrtOp::create(builder, loc, argTy, arg);
- return sqrt.getResult();
- }
+ case X86::BI__builtin_ia32_sqrtps:
case X86::BI__builtin_ia32_sqrtph256:
case X86::BI__builtin_ia32_sqrtph:
case X86::BI__builtin_ia32_sqrtph512:
case X86::BI__builtin_ia32_sqrtps512:
- case X86::BI__builtin_ia32_sqrtpd512:
+ case X86::BI__builtin_ia32_sqrtpd512: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ mlir::Value arg = ops[0];
+ return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult();
+ }
case X86::BI__builtin_ia32_pmuludq128:
case X86::BI__builtin_ia32_pmuludq256:
case X86::BI__builtin_ia32_pmuludq512:
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
new file mode 100644
index 0000000000000..ef5cb954e3efe
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
@@ -0,0 +1,67 @@
+// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtph, etc.)
+// RUN: %clang_cc1 -fclangir -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -emit-cir %s -o - | FileCheck %s
+
+#include <immintrin.h>
+
+// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit)
+__m128 test_sqrtps(__m128 x) {
+ return __builtin_ia32_sqrtps(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit)
+__m256 test_sqrtps256(__m256 x) {
+ return __builtin_ia32_sqrtps256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit)
+__m512 test_sqrtps512(__m512 x) {
+ return __builtin_ia32_sqrtps512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps512
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit)
+__m128d test_sqrtpd(__m128d x) {
+ return __builtin_ia32_sqrtpd(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit)
+__m256d test_sqrtpd256(__m256d x) {
+ return __builtin_ia32_sqrtpd256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit)
+__m512d test_sqrtpd512(__m512d x) {
+ return __builtin_ia32_sqrtpd512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd512
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtph - half precision vector sqrt (128-bit)
+__m128h test_sqrtph(__m128h x) {
+ return __builtin_ia32_sqrtph(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtph
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtph256 - half precision vector sqrt (256-bit)
+__m256h test_sqrtph256(__m256h x) {
+ return __builtin_ia32_sqrtph256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtph256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtph512 - half precision vector sqrt (512-bit)
+__m512h test_sqrtph512(__m512h x) {
+ return __builtin_ia32_sqrtph512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtph512
+// CHECK: cir.sqrt
\ No newline at end of file
>From ef3fd9711494e864190932566bcfe46231b95c51 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Sun, 30 Nov 2025 11:44:23 +0530
Subject: [PATCH 03/27] Test file renamed
---
.../CIR/CodeGen/X86/cir-sqrtps-builtins.c | 46 -------------------
1 file changed, 46 deletions(-)
delete mode 100644 clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
deleted file mode 100644
index 6e1dace82928c..0000000000000
--- a/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
+++ /dev/null
@@ -1,46 +0,0 @@
-// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtss, sqrtsd, etc.)
-// RUN: %clang_cc1 -fcir -triple x86_64-unknown-linux-gnu -O0 %s -emit-cir -o - | FileCheck %s
-
-#include <immintrin.h>
-
-// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit)
-__m128 test_sqrtps(__m128 x) {
- return __builtin_ia32_sqrtps(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit)
-__m256 test_sqrtps256(__m256 x) {
- return __builtin_ia32_sqrtps256(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps256
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit)
-__m512 test_sqrtps512(__m512 x) {
- return __builtin_ia32_sqrtps512(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps512
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit)
-__m128d test_sqrtpd(__m128d x) {
- return __builtin_ia32_sqrtpd(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit)
-__m256d test_sqrtpd256(__m256d x) {
- return __builtin_ia32_sqrtpd256(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd256
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit)
-__m512d test_sqrtpd512(__m512d x) {
- return __builtin_ia32_sqrtpd512(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd512
-// CHECK: cir.sqrt
>From 97056731fce0a5e5c2185e16986e0189cec95c7b Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Tue, 2 Dec 2025 21:29:57 +0530
Subject: [PATCH 04/27] Add sqrt changes patch
---
my-sqrt-changes.patch | Bin 0 -> 12058 bytes
1 file changed, 0 insertions(+), 0 deletions(-)
create mode 100644 my-sqrt-changes.patch
diff --git a/my-sqrt-changes.patch b/my-sqrt-changes.patch
new file mode 100644
index 0000000000000000000000000000000000000000..87c0ca69ac8abe6aaa684ffbbce3c65e342f6066
GIT binary patch
literal 12058
zcmdU#Z%-V_5yo4}yGZ#CbNCYBz=91N*n~S7WA4rpIa`=G7o8$*fn7{u*uCAw4m$Z$
z<U{2H<$aRp*R at 6WjAxexmdK}(%}h^sRdsd!d8%gr`=6t5656_o(9m_Pr-AO9VKE$r
zxB7dgu|<uZ#pm;I6k3&cTj6=w)z~x5YHCeOGY5LMQG5F$bVENZgg2snP+31c+P?|E
zj`AVHK>v=!tu3i|F2ZT(#J5Fw7#`?a4J*1>g9PmBADXqIw at 2E?gZTD9vmS+IJum2D
zRow**wAax3yVD$8=HgXIgUI_%f6v0X_V+gK<4JfT2^!kLd{_!=x}TH0^ZHll&67B%
zp*4%)%XlI0$LEtq3vtQMw2%h7;@i_M2YS;H_kQGPyPx#`-$zYt4foOrJex$wX4)*@
z72&0L_M&ZeqO}fWOPuPCuGis at x_hbjo$xn}Vt?}*IUk;DPxn55Q#)vChOa)+eJkz*
zYVV{s&bbKhwLjM69JXa4%<k%q?|vbZw+OHF7CLV<V-RhAJ?fhidM689kc7SHQ9JUF
zt*{-w)0%CK4 at 4W!YsVRd=9eYIM(y2cru{1Sm-52zrOlnv>V|k>FK6O_x1Z>~7uLls
z*GA)V?|;_(z3^?e_l8zf_g;5SZda39!rjli%C7cwRwvPlQ=~vw7X5m<!@}Kq>g}Ji
zNpsAjs=kw-RMOn1uP>AJUbGvFlH`glS=gp*_nWXkj3E26yyqI*ku`70diM3Sd-?W-
zyz`Zwz8!kYy=aeU{7f3YJ-sAlo~Md1BcH>Qdh$ZcM^6<M$OxQqZ<v#2E~P>qPTum|
zxb=9_Sx9;9<}~)gMU1MJAFo3P?$5Lj(<AZ9q7r#_QaZ#0G|`Sx5o<?N#2Q9jHaIa_
zo}2R-eF8a-m;@_qipyZ4v$-y1-%VN5(~~?|!$P!<!^=}}9>icnT;%Y at TU-AB7rhN6
z``zI;sG}$R3hxAsHHBp!%Oh80kB`F_dIGDql`j{Rtsd+7K-qd(Z(2fXOS)gtsw%#-
zEOU3*%q4epc2@}3`8dv7)=nR3 at AKL_ST8*<OE-r3$U{U3M|#BNvltVJyqog((-_5y
zhzHC+_v6ThthX88&qOr!gW@@P at jUwGiSSHIxT_U$bx$#DJD!Z8uYUMz_)0u~jwf2^
z4nzYg2OB|kOLY2r?r1$y^dqGM&AJF*M+{^*rw}saME@|P^kMiHOPPy4^r`1ilM-zB
zlQcJoy8R_dZ?aacDBn%J?evHHb?bQ<EvHqojJc9!^flvD7fAm?Z#elGS;Je|gH;?<
zqYdSvPK***6I{;Y3UCA!Ku0>B8QRNSw3ad#cG8y}_e3KhdE at rO&|+-`Rtr9a@{d{p
zj#!8sImaJ0+7SivWU7Y>&BLa<l8S7ZqsLbCfm4l=NgJ`o!0t!w0{VC^nH at SrMx$$J
zGT&V;Ynn&ZYko%DhEpq2V^357cDAhb>z}{+SNOM{n)=tiLi2~3MUTMhD?>5VUDP<^
zDHA)W(wIrD^+tE5iL#DB)kc at yql0lWjn|4{d8%dXCeckAvWP=%n9zK#%e02<L=`y@
zwF}vV?Y6vUs>Nos1R`I*R@;UFutwKpnFgow+o&Uap=yD=BTG`reRlb_EgPY?Q704h
zjAwoVbv)l`<|v-Fw#qlBxKWX3S-H80vaTpEBz_+h4J at +fwrmpSnx_v1_UJ^fG9O51
zh;DPsFbkxbPgd5 at +O~Wy*-w(fGZU74EDr9W*c&>Sjh at Go=f_RSL!R=;^g=6NM{X~~
zA&ooYTo0dT-h2OY&zy&I&+QXlT*lpYeOvxH{Ukx17Ou+1A4kc2cf`u|7#-bTW-GfV
zx){c9=v3)*3`W?5WqY#?v)oTT+q-}LNfv^}t=H64^yQqX5YYH9V=t3c7u44vUa#%8
z<Z0yCZ}ji%a@?N0`nKx%d!g4=MbATNr7N9Oar3k&qj91MQV}nyDGp*au_SM$3%nX5
zCRIf%RuxVC-_rPz<}JlaWAdtEno^IdD5lB7tBEz~Wm!o7p)RV4^t`NRo8{e<b$R&f
zn6u8MCy$GE#$PLf(naxZ1aqkpKrwdWWh)?0cI7Lxaa~{Yj<w#p_XirITVwX}T-}cO
zq5lva$(n{5j4qs>s=nFRRbII-DoyQfNpBpN_Lb-N#h2)cZR|@%=>1((%txoe1G;z3
zqd%5M!m?mKWa+7M_ZR7DRyf)BZ2C@;8YZ=lds&x#txEaB@)XFM>W<owUg*!^!)3*G
zdo6Z#I!p4`)hV6~G^cmYeBGM9gobOZhUTk`T}^2j<P37?$OjB197p}{#F1Im;tk;}
z8*~5iclT>fFZXMkmsd2m<>Qi*_YGTMS7j-`)2`?QfWZ2CcR%h6{6jSQoi1v@$vm6W
z039~v%T|H$rHVLJMR>`QlZ?48@)&1pP1_#NUd0$>HIVDMyf53)tU6Ok at AbSKE4q|C
z8#m*;Fg#D6hmH(8rwG0zADJJ1`CF4kJ{mrKiqSi1A-^-s&K`Zg+YvcACeL?Im$k1m
z-H_#w5vgd`YtZ*sLwmxCQe at A$ru*yJ7<hsX0ND<k+Yn~3Iw#c`^ug%#V!NlZW=^Hf
z?hNbJ`MGB-vj&gGpc(oRbTYu1bQ?DG<a&6O^_$Ruc`^05*%O*fPi{*oTckKs=3mdD
zi2XkFpUX3QnTJhU_)R89I+1c_aar6tcC~BzMqq^G5w_ZUg*@F3D|Cew^7wu;Wc)0m
z5b6Nz#lJ`yZZ5x at 921w<O+vGZhL;sHs8X<@mUj72_YakALBp#$zqamIgq;p`w<cV?
zqOqH!;hV`jDGt6Fb&sEXPgs#p;GrO3f2O-`bLd!GW<1nb9(Bl?{CdVZIk{D0d-U55
zm5ms8ltd5VNYSc{mS;83Jvg6D<w#$nEBR6k>t*!H^57GPy6_Imr*$3l#x?ffQsr?4
ze<xzoOKmHjSl-SNONt^?V8ompuRPb7b#qwhKAp#SliH$UqGcVCDz#keSJ6X<Mit4u
z$72*fi+z{G<9^h5ip*uLI~L%CwxtO}*kJ3r4i;-YUgaIEyi=R{kSR|zG8wEtwmCds
zdf1@#)I;=oY*F85_?^kF-lu$oRL2?_=q$)xd<`9~jqByu at tVs0Wt4qhwNHzd<=!5C
z!5OAK*6%6VPeK`Hjc%I@^2iJ6XFqBaZ_hKTb@;4n?KW!I0<@Z2JsHRCD)Dmro))61
zY8$R)+nh|}taJ2{ETK$kl$EFXP~n=1GVoP~h=F~9IP%jm3#@*}$?ey^3z+6i-yipH
z>!^(SdXW23d8f!<j-2BoxktfW?aU)No<A-UnqJIn^L1#3T~jgJ*BSaCXw1G4*%u#+
z4Aukb3g=MA97XH0-)Q(;A^`b;_24Y4{_u0MkL?plOpf>B)YR;5j8epdb|n*WYGf#4
z3t7*40d4)|`<By1UW*?t)A(PKRy$W{`@Lw8L5NHCJ5s2Ek at ob~f8n>NJD;16PmYz+
z`rGnCo6txd#WhuU52V9YjU5UxeG!oq80xXG)0)QUF+LEoSkiqn)(MmTPT}r!#y%#+
zR at 2p<V=jKd;1#RgJK8dfy%+U7>Ni>Rn$Ly^gw7#P*oplnl(&3Owo#AR`5OB at Q~DiK
z5ytbY;>%~^w5JS<)!N<7X*m*f4DZiGm+~)`t|y~w!+!r}|NB^#_I3B~bjnoATDrML
i>Z$uV<~q4bZj<k6%t_u&-c)?ycV>LO at Ne}=*8c%%jt^@9
literal 0
HcmV?d00001
>From 21119e5ae7529285662c0e9dc6c0024e07a5899b Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:19:33 +0530
Subject: [PATCH 05/27] group with other floating point ops
---
clang/include/clang/CIR/Dialect/IR/CIROps.td | 41 ++--------
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 4 +-
.../test/CIR/CodeGen/X86/cir-sqrt-builtins.c | 80 +++++--------------
3 files changed, 31 insertions(+), 94 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 2dc71c68f8a94..dc9e3c6a486d6 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -3275,39 +3275,6 @@ def CIR_InlineAsmOp : CIR_Op<"asm", [RecursiveMemoryEffects]> {
let hasCustomAssemblyFormat = 1;
}
-//===----------------------------------------------------------------------===//
-// SqrtOp
-//===----------------------------------------------------------------------===//
-
-def CIR_SqrtOp : CIR_Op<"sqrt", [Pure]> {
- let summary = "Floating-point square root";
-
- let description = [{
- The `cir.sqrt` operation computes the element-wise square root of its input.
-
- The input must be either:
- • a floating-point scalar type, or
- • a vector whose element type is floating-point.
-
- The result type must match the input type exactly.
-
- Examples:
- // scalar
- %r = cir.sqrt %x : !cir.fp64
-
- // vector
- %v = cir.sqrt %vec : !cir.vector<!cir.fp32 x 4>
- }];
-
- // input and output types: float or vector-of-float
- let arguments = (ins CIR_AnyFloatOrVecOfFloatType:$input);
- let results = (outs CIR_AnyFloatOrVecOfFloatType:$result);
-
- let assemblyFormat = [{
- $input `:` type($input) attr-dict
- }];
-}
-
//===----------------------------------------------------------------------===//
// UnreachableOp
//===----------------------------------------------------------------------===//
@@ -4664,6 +4631,14 @@ def CIR_PtrDiffOp : CIR_Op<"ptr_diff", [Pure, SameTypeOperands]> {
// Floating Point Ops
//===----------------------------------------------------------------------===//
+def CIR_SqrtOp : CIR_UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp"> {
+ let summary = "Floating-point square root operation";
+
+ let description = [{
+ Computes the square root of a floating-point value or vector.
+ }];
+}
+
class CIR_UnaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
: CIR_Op<mnemonic, [Pure, SameOperandsAndResultType]>
{
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index f8a139ec7a8e0..35ba0f48ce6d8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -781,14 +781,14 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_sqrtsh_round_mask:
case X86::BI__builtin_ia32_sqrtsd_round_mask:
case X86::BI__builtin_ia32_sqrtss_round_mask:
- errorNYI("masked round sqrt builtins");
- return {};
case X86::BI__builtin_ia32_sqrtpd256:
case X86::BI__builtin_ia32_sqrtpd:
case X86::BI__builtin_ia32_sqrtps256:
case X86::BI__builtin_ia32_sqrtps:
case X86::BI__builtin_ia32_sqrtph256:
case X86::BI__builtin_ia32_sqrtph:
+ errorNYI("Unimplemented builtin");
+ return {};
case X86::BI__builtin_ia32_sqrtph512:
case X86::BI__builtin_ia32_sqrtps512:
case X86::BI__builtin_ia32_sqrtpd512: {
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
index ef5cb954e3efe..97993cabf0ebf 100644
--- a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
@@ -1,67 +1,29 @@
-// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtph, etc.)
-// RUN: %clang_cc1 -fclangir -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -emit-cir %s -o - | FileCheck %s
-
#include <immintrin.h>
+// Test X86-specific sqrt builtins
-// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit)
-__m128 test_sqrtps(__m128 x) {
- return __builtin_ia32_sqrtps(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit)
-__m256 test_sqrtps256(__m256 x) {
- return __builtin_ia32_sqrtps256(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps256
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit)
-__m512 test_sqrtps512(__m512 x) {
- return __builtin_ia32_sqrtps512(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps512
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit)
-__m128d test_sqrtpd(__m128d x) {
- return __builtin_ia32_sqrtpd(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit)
-__m256d test_sqrtpd256(__m256d x) {
- return __builtin_ia32_sqrtpd256(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd256
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit)
-__m512d test_sqrtpd512(__m512d x) {
- return __builtin_ia32_sqrtpd512(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd512
-// CHECK: cir.sqrt
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
-// Test __builtin_ia32_sqrtph - half precision vector sqrt (128-bit)
-__m128h test_sqrtph(__m128h x) {
- return __builtin_ia32_sqrtph(x);
+// Test __builtin_ia32_sqrtph512
+__m512h test_sqrtph512(__m512h a) {
+ return __builtin_ia32_sqrtph512(a);
}
-// CHECK-LABEL: cir.func @test_sqrtph
-// CHECK: cir.sqrt
+// CHECK: cir.func @test_sqrtph512
+// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.fp16 x 32>
+// CHECK: cir.return [[RES]]
-// Test __builtin_ia32_sqrtph256 - half precision vector sqrt (256-bit)
-__m256h test_sqrtph256(__m256h x) {
- return __builtin_ia32_sqrtph256(x);
+// Test __builtin_ia32_sqrtps512
+__m512 test_sqrtps512(__m512 a) {
+ return __builtin_ia32_sqrtps512(a);
}
-// CHECK-LABEL: cir.func @test_sqrtph256
-// CHECK: cir.sqrt
+// CHECK: cir.func @test_sqrtps512
+// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.float x 16>
+// CHECK: cir.return [[RES]]
-// Test __builtin_ia32_sqrtph512 - half precision vector sqrt (512-bit)
-__m512h test_sqrtph512(__m512h x) {
- return __builtin_ia32_sqrtph512(x);
+// Test __builtin_ia32_sqrtpd512
+__m512d test_sqrtpd512(__m512d a) {
+ return __builtin_ia32_sqrtpd512(a);
}
-// CHECK-LABEL: cir.func @test_sqrtph512
-// CHECK: cir.sqrt
\ No newline at end of file
+// CHECK: cir.func @test_sqrtpd512
+// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.double x 8>
+// CHECK: cir.return [[RES]]
\ No newline at end of file
>From 90878ec8d8d6b5b46286c419c4187f01215b6e4b Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:25:20 +0530
Subject: [PATCH 06/27] place the implementation with other floating point ops
---
clang/include/clang/CIR/Dialect/IR/CIROps.td | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index dc9e3c6a486d6..fa10848f4397a 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -4631,14 +4631,6 @@ def CIR_PtrDiffOp : CIR_Op<"ptr_diff", [Pure, SameTypeOperands]> {
// Floating Point Ops
//===----------------------------------------------------------------------===//
-def CIR_SqrtOp : CIR_UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp"> {
- let summary = "Floating-point square root operation";
-
- let description = [{
- Computes the square root of a floating-point value or vector.
- }];
-}
-
class CIR_UnaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
: CIR_Op<mnemonic, [Pure, SameOperandsAndResultType]>
{
@@ -4650,6 +4642,14 @@ class CIR_UnaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
let llvmOp = llvmOpName;
}
+def CIR_SqrtOp : CIR_UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp"> {
+ let summary = "Floating-point square root operation";
+
+ let description = [{
+ Computes the square root of a floating-point value or vector.
+ }];
+}
+
def CIR_ACosOp : CIR_UnaryFPToFPBuiltinOp<"acos", "ACosOp"> {
let summary = "Computes the arcus cosine of the specified value";
let description = [{
>From 3529f40b48025f543a2b3ced9d6aa63a2241283f Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:30:58 +0530
Subject: [PATCH 07/27] Update
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 5514a4cd0876d..709e3026e51f1 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -45,30 +45,30 @@
using namespace cir;
using namespace llvm;
-using namespace mlir;
-static std::string getLLVMIntrinsicNameForType(Type llvmTy) {
+
+static std::string getLLVMIntrinsicNameForType(mlir::Type llvmTy) {
std::string s;
{
llvm::raw_string_ostream os(s);
llvm::Type *unused = nullptr;
os << llvmTy;
}
- if (auto vecTy = llvmTy.dyn_cast<LLVM::LLVMType>()) {
+ if (auto vecTy = llvmTy.dyn_cast<mlir::LLVM::LLVMType>()) {
}
return s;
}
// Actual lowering
-LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
+mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
- ConversionPatternRewriter &rewriter) const {
+ mlir::ConversionPatternRewriter &rewriter) const {
- Location loc = op.getLoc();
- MLIRContext *ctx = rewriter.getContext();
+ mlir::Location loc = op.getLoc();
+ mlir::MLIRContext *ctx = rewriter.getContext();
- Type cirResTy = op.getResult().getType();
- Type llvmResTy = getTypeConverter()->convertType(cirResTy);
+ mlir::Type cirResTy = op.getResult().getType();
+ mlir::Type llvmResTy = getTypeConverter()->convertType(cirResTy);
if (!llvmResTy)
return op.emitOpError(
"expected LLVM dialect result type for cir.sqrt lowering");
>From 92d0ac3ed203e38e244c0afabb5f3524d1772645 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:44:10 +0530
Subject: [PATCH 08/27] Update clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 35ba0f48ce6d8..eb9ac260f225d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -794,7 +794,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_sqrtpd512: {
mlir::Location loc = getLoc(expr->getExprLoc());
mlir::Value arg = ops[0];
- return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult();
+ return builder.create<cir::SqrtOp>(loc, arg.getType(), arg).getResult();
}
case X86::BI__builtin_ia32_pmuludq128:
case X86::BI__builtin_ia32_pmuludq256:
>From 0385662da0847396b4096ddad2c90bcf1c593d0f Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:45:45 +0530
Subject: [PATCH 09/27] Update
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 709e3026e51f1..a80103764a60a 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -51,7 +51,6 @@ static std::string getLLVMIntrinsicNameForType(mlir::Type llvmTy) {
std::string s;
{
llvm::raw_string_ostream os(s);
- llvm::Type *unused = nullptr;
os << llvmTy;
}
if (auto vecTy = llvmTy.dyn_cast<mlir::LLVM::LLVMType>()) {
>From ddcb7b8476e796d9945bbde26a39e567853da34e Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:51:05 +0530
Subject: [PATCH 10/27] update
clang\lib\CIR\Lowering\DirectToLLVM\LowerToLLVM.cpp
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 5514a4cd0876d..c17980f7ffbf7 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -54,8 +54,6 @@ static std::string getLLVMIntrinsicNameForType(Type llvmTy) {
llvm::Type *unused = nullptr;
os << llvmTy;
}
- if (auto vecTy = llvmTy.dyn_cast<LLVM::LLVMType>()) {
- }
return s;
}
>From 233efad67b9677f6e77034e6868905070708765a Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:56:02 +0530
Subject: [PATCH 11/27] Update
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
index be6a380372efe..1f69b7d66f25e 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
@@ -12,7 +12,6 @@
#ifndef CLANG_CIR_LOWERTOLLVM_H
#define CLANG_CIR_LOWERTOLLVM_H
-#include "mlir/Conversion/PatternRewriter.h"
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Transforms/DialectConversion.h"
>From 9d940bc80e60470e6f5dcc82d74e45dd361acdc2 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:57:44 +0530
Subject: [PATCH 12/27] Update
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index ebb41e42a2871..5dfef939126d0 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -120,8 +120,8 @@ mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
if (!module.lookupSymbol<LLVM::LLVMFuncOp>(intrinsicName)) {
OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPointToStart(module.getBody());
- auto llvmFnType = LLVM::LLVMType::getFunctionTy(llvmResTy, {llvmResTy},
- /*isVarArg=*/false);
+ auto llvmFnType = LLVM::LLVMFunctionType::get(ctx, llvmResTy, {llvmResTy},
+ /*isVarArg=*/false);
rewriter.create<LLVM::LLVMFuncOp>(loc, intrinsicName, llvmFnType);
}
>From 51bbccad4f784a4c44d6562ccef36caaf2f1b521 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 16:18:27 +0530
Subject: [PATCH 13/27] Remove BOM character
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index ebb41e42a2871..0395f905c866b 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//===-- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
>From e5789b65fc43637493e07979a9ac56dfd9cbee37 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 16:29:05 +0530
Subject: [PATCH 14/27] Apply suggestion from @Copilot
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 5dfef939126d0..11f042737d658 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
>From 8937b12959c7a4336b6857c1a57b54e6c99d5457 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 16:31:14 +0530
Subject: [PATCH 15/27] Apply suggestion from @andykaylor
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index eb9ac260f225d..35ba0f48ce6d8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -794,7 +794,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_sqrtpd512: {
mlir::Location loc = getLoc(expr->getExprLoc());
mlir::Value arg = ops[0];
- return builder.create<cir::SqrtOp>(loc, arg.getType(), arg).getResult();
+ return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult();
}
case X86::BI__builtin_ia32_pmuludq128:
case X86::BI__builtin_ia32_pmuludq256:
>From 8a02c504acf42c81bd0c53df89e296480b74c05b Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 16:44:08 +0530
Subject: [PATCH 16/27] add description
---
clang/include/clang/CIR/Dialect/IR/CIROps.td | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index fa10848f4397a..06eb7d6689362 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -4647,6 +4647,19 @@ def CIR_SqrtOp : CIR_UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp"> {
let description = [{
Computes the square root of a floating-point value or vector.
+
+ The input must be either:
+ • a floating-point scalar type, or
+ • a vector whose element type is floating-point.
+
+ The result type must match the input type exactly.
+
+ Examples:
+ // scalar
+ %r = cir.sqrt %x : !cir.fp64
+
+ // vector
+ %v = cir.sqrt %vec : !cir.vector<!cir.fp32 x 4>
}];
}
>From 82a9395517d79c79653194939c107234e1628d05 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 17:17:40 +0530
Subject: [PATCH 17/27] Remove undefined sqrt builtin cases
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 6 ------
1 file changed, 6 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 35ba0f48ce6d8..0b796e4e3a860 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -781,12 +781,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_sqrtsh_round_mask:
case X86::BI__builtin_ia32_sqrtsd_round_mask:
case X86::BI__builtin_ia32_sqrtss_round_mask:
- case X86::BI__builtin_ia32_sqrtpd256:
- case X86::BI__builtin_ia32_sqrtpd:
- case X86::BI__builtin_ia32_sqrtps256:
- case X86::BI__builtin_ia32_sqrtps:
- case X86::BI__builtin_ia32_sqrtph256:
- case X86::BI__builtin_ia32_sqrtph:
errorNYI("Unimplemented builtin");
return {};
case X86::BI__builtin_ia32_sqrtph512:
>From 6bd328210bcd68abe14e36895f6d587a54b99ed2 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 17:25:40 +0530
Subject: [PATCH 18/27] Remove unused getLLVMIntrinsicNameForType function
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 10 ----------
1 file changed, 10 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 5dfef939126d0..8b8b756a7f691 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -46,16 +46,6 @@
using namespace cir;
using namespace llvm;
-
-static std::string getLLVMIntrinsicNameForType(mlir::Type llvmTy) {
- std::string s;
- {
- llvm::raw_string_ostream os(s);
- os << llvmTy;
- }
- return s;
-}
-
// Actual lowering
mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
>From 8232ce8a4de0e8c179d42739b354695987be458f Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 17:37:11 +0530
Subject: [PATCH 19/27] Removed braces
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 8b8b756a7f691..4cbea38a30c50 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -62,9 +62,8 @@ mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
Value operand = adaptor.getInput();
Value llvmOperand = operand;
- if (operand.getType() != llvmResTy) {
+ if (operand.getType() != llvmResTy)
llvmOperand = rewriter.create<LLVM::BitcastOp>(loc, llvmResTy, operand);
- }
// Build the llvm.sqrt.* intrinsic name depending on scalar vs vector result
std::string intrinsicName = "llvm.sqrt.";
@@ -84,9 +83,8 @@ mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
suffix = "v" + std::to_string(n) + "f16";
else
return op.emitOpError("unsupported float width for sqrt");
- } else {
+ } else
return op.emitOpError("vector element must be floating point for sqrt");
- }
} else if (auto f = cirResTy.dyn_cast<cir::FloatType>()) {
// Scalar float
unsigned width = f.getWidth();
@@ -98,9 +96,8 @@ mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
suffix = "f16";
else
return op.emitOpError("unsupported float width for sqrt");
- } else {
+ } else
return op.emitOpError("unsupported type for cir.sqrt lowering");
- }
intrinsicName += suffix;
>From bc8e4ccfc22731aaee790659d8b5072ab36be7a7 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 17:39:45 +0530
Subject: [PATCH 20/27] Update
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 74 +------------------
1 file changed, 4 insertions(+), 70 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 4cbea38a30c50..e7a6e8677569a 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -47,77 +47,11 @@ using namespace cir;
using namespace llvm;
// Actual lowering
-mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
- cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
- mlir::ConversionPatternRewriter &rewriter) const {
-
- mlir::Location loc = op.getLoc();
- mlir::MLIRContext *ctx = rewriter.getContext();
-
- mlir::Type cirResTy = op.getResult().getType();
- mlir::Type llvmResTy = getTypeConverter()->convertType(cirResTy);
- if (!llvmResTy)
- return op.emitOpError(
- "expected LLVM dialect result type for cir.sqrt lowering");
-
- Value operand = adaptor.getInput();
- Value llvmOperand = operand;
- if (operand.getType() != llvmResTy)
- llvmOperand = rewriter.create<LLVM::BitcastOp>(loc, llvmResTy, operand);
-
- // Build the llvm.sqrt.* intrinsic name depending on scalar vs vector result
- std::string intrinsicName = "llvm.sqrt.";
- std::string suffix;
-
- // If the CIR result type is a vector, include the 'vN' part in the suffix.
- if (auto vec = cirResTy.dyn_cast<cir::VectorType>()) {
- Type elt = vec.getElementType();
- if (auto f = elt.dyn_cast<cir::FloatType>()) {
- unsigned width = f.getWidth();
- unsigned n = vec.getNumElements();
- if (width == 32)
- suffix = "v" + std::to_string(n) + "f32";
- else if (width == 64)
- suffix = "v" + std::to_string(n) + "f64";
- else if (width == 16)
- suffix = "v" + std::to_string(n) + "f16";
- else
- return op.emitOpError("unsupported float width for sqrt");
- } else
- return op.emitOpError("vector element must be floating point for sqrt");
- } else if (auto f = cirResTy.dyn_cast<cir::FloatType>()) {
- // Scalar float
- unsigned width = f.getWidth();
- if (width == 32)
- suffix = "f32";
- else if (width == 64)
- suffix = "f64";
- else if (width == 16)
- suffix = "f16";
- else
- return op.emitOpError("unsupported float width for sqrt");
- } else
- return op.emitOpError("unsupported type for cir.sqrt lowering");
-
- intrinsicName += suffix;
-
- // Ensure the llvm intrinsic function exists at module scope. Insert it at
- // the start of the module body using an insertion guard.
- ModuleOp module = op->getParentOfType<ModuleOp>();
- if (!module.lookupSymbol<LLVM::LLVMFuncOp>(intrinsicName)) {
- OpBuilder::InsertionGuard guard(rewriter);
- rewriter.setInsertionPointToStart(module.getBody());
- auto llvmFnType = LLVM::LLVMFunctionType::get(ctx, llvmResTy, {llvmResTy},
- /*isVarArg=*/false);
- rewriter.create<LLVM::LLVMFuncOp>(loc, intrinsicName, llvmFnType);
- }
-
- // Create the call and replace cir.sqrt
- auto callee = SymbolRefAttr::get(ctx, intrinsicName);
- rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, llvmResTy, callee,
- ArrayRef<Value>{llvmOperand});
-
+ mlir::Type resTy = typeConverter->convertType(op.getType());
+ rewriter.replaceOpWithNewOp<mlir::LLVM::SqrtOp>(op, resTy,
+ adaptor.getSrc());
return mlir::success();
+
}
namespace cir {
>From 92847619f4b000f6dcefe307543dcf6e7b917a14 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 19:37:22 +0530
Subject: [PATCH 21/27] update
clang\lib\CIR\Lowering\DirectToLLVM\LowerToLLVM.h
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 13 -------------
1 file changed, 13 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
index 1f69b7d66f25e..0591de545b81d 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
@@ -17,19 +17,6 @@
#include "mlir/Transforms/DialectConversion.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
-namespace cir {
-class SqrtOp;
-}
-
-class CIRToLLVMSqrtOpLowering : public mlir::OpConversionPattern<cir::SqrtOp> {
-public:
- using mlir::OpConversionPattern<cir::SqrtOp>::OpConversionPattern;
-
- mlir::LogicalResult
- matchAndRewrite(cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
- mlir::ConversionPatternRewriter &rewriter) const override;
-};
-
namespace cir {
namespace direct {
>From 8647b5c719a7d91c3dbd3954b022621c3b550aaf Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 19:41:21 +0530
Subject: [PATCH 22/27] Update clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
index 97993cabf0ebf..bf496f2ea733d 100644
--- a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
@@ -2,7 +2,11 @@
// Test X86-specific sqrt builtins
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
-// RUN: FileCheck --input-file=%t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
// Test __builtin_ia32_sqrtph512
__m512h test_sqrtph512(__m512h a) {
>From 4bac65a58020456624d39efb64f27d1301c4bb23 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 19:48:11 +0530
Subject: [PATCH 23/27] Update test
---
.../test/CIR/CodeGen/X86/cir-sqrt-builtins.c | 30 +++++++++++++------
1 file changed, 21 insertions(+), 9 deletions(-)
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
index bf496f2ea733d..a3de192f9e142 100644
--- a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
@@ -12,22 +12,34 @@
__m512h test_sqrtph512(__m512h a) {
return __builtin_ia32_sqrtph512(a);
}
-// CHECK: cir.func @test_sqrtph512
-// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.fp16 x 32>
-// CHECK: cir.return [[RES]]
+// CIR: cir.func @test_sqrtph512
+// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.fp16 x 32>
+// CIR: cir.return [[RES]]
+// LLVM: define {{.*}} @test_sqrtph512
+// LLVM: call <32 x half> @llvm.sqrt.v32f16
+// OGCG: define {{.*}} @test_sqrtph512
+// OGCG: call <32 x half> @llvm.sqrt.v32f16
// Test __builtin_ia32_sqrtps512
__m512 test_sqrtps512(__m512 a) {
return __builtin_ia32_sqrtps512(a);
}
-// CHECK: cir.func @test_sqrtps512
-// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.float x 16>
-// CHECK: cir.return [[RES]]
+// CIR: cir.func @test_sqrtps512
+// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.float x 16>
+// CIR: cir.return [[RES]]
+// LLVM: define {{.*}} @test_sqrtps512
+// LLVM: call <16 x float> @llvm.sqrt.v16f32
+// OGCG: define {{.*}} @test_sqrtps512
+// OGCG: call <16 x float> @llvm.sqrt.v16f32
// Test __builtin_ia32_sqrtpd512
__m512d test_sqrtpd512(__m512d a) {
return __builtin_ia32_sqrtpd512(a);
}
-// CHECK: cir.func @test_sqrtpd512
-// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.double x 8>
-// CHECK: cir.return [[RES]]
\ No newline at end of file
+// CIR: cir.func @test_sqrtpd512
+// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.double x 8>
+// CIR: cir.return [[RES]]
+// LLVM: define {{.*}} @test_sqrtpd512
+// LLVM: call <8 x double> @llvm.sqrt.v8f64
+// OGCG: define {{.*}} @test_sqrtpd512
+// OGCG: call <8 x double> @llvm.sqrt.v8f64
\ No newline at end of file
>From b1ff2abd50b55470361f721053fc72a9080c20d6 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 20:25:55 +0530
Subject: [PATCH 24/27] update
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index e7a6e8677569a..846fc5c07f798 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
>From 884300615cd4900e44af48016cd895005821e41f Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Fri, 5 Dec 2025 19:06:33 +0530
Subject: [PATCH 25/27] Remove unused include
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 846fc5c07f798..08573c0ae83bb 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -30,7 +30,6 @@
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Export.h"
#include "mlir/Transforms/DialectConversion.h"
-#include "clang/Basic/LLVM.h"
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
>From ed82423252e2b452efc4d3265166c08e797b259e Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Fri, 5 Dec 2025 19:19:21 +0530
Subject: [PATCH 26/27] Move sqrt lowering with other floating point operations
---
.../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 08573c0ae83bb..34a1ca3f10c01 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -45,14 +45,6 @@
using namespace cir;
using namespace llvm;
-// Actual lowering
- mlir::Type resTy = typeConverter->convertType(op.getType());
- rewriter.replaceOpWithNewOp<mlir::LLVM::SqrtOp>(op, resTy,
- adaptor.getSrc());
- return mlir::success();
-
-}
-
namespace cir {
namespace direct {
@@ -194,6 +186,14 @@ mlir::LogicalResult CIRToLLVMCopyOpLowering::matchAndRewrite(
return mlir::success();
}
+mlir::LogicalResult SqrtOpLowering::matchAndRewrite(
+ cir::SqrtOp op, OpAdaptor adaptor,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ mlir::Type resTy = typeConverter->convertType(op.getType());
+ rewriter.replaceOpWithNewOp<mlir::LLVM::SqrtOp>(op, resTy, adaptor.getSrc());
+ return mlir::success();
+}
+
mlir::LogicalResult CIRToLLVMCosOpLowering::matchAndRewrite(
cir::CosOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
>From 961c9f95a70b1368b2e33adeae0aa63fd2b9ae8c Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Sat, 6 Dec 2025 09:31:10 +0530
Subject: [PATCH 27/27] Remove BOM character
---
clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 34a1ca3f10c01..ffaebdcd9f062 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
More information about the llvm-commits
mailing list