[clang] [llvm] [CIR][X86] Implement lowering for sqrt builtins (PR #169310)

Fri Dec 5 20:12:51 PST 2025

https://github.com/Priyanshu3820 updated https://github.com/llvm/llvm-project/pull/169310

>From 627bcb3bde64a780ed2b9aaaa9267d97c9679f9c Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Wed, 26 Nov 2025 17:45:00 +0530
Subject: [PATCH 01/27] Add CIR sqrt builtin support for X86

---
 clang/include/clang/CIR/Dialect/IR/CIROps.td  | 344 +++++++++++++++++-
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    |  66 +++-
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 323 +++++++++++++++-
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.h   |  14 +
 .../CIR/CodeGen/X86/cir-sqrtps-builtins.c     |  46 +++
 5 files changed, 772 insertions(+), 21 deletions(-)
 create mode 100644 clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index e612d6a0ba886..291b035e6204c 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -802,8 +802,8 @@ def CIR_ConditionOp : CIR_Op<"condition", [
 //===----------------------------------------------------------------------===//
 
 defvar CIR_YieldableScopes = [
-  "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "GlobalOp", "IfOp",
-  "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp", "TryOp"
+  "ArrayCtor", "ArrayDtor", "AwaitOp", "CaseOp", "DoWhileOp", "ForOp",
+  "GlobalOp", "IfOp", "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp", "TryOp"
 ];
 
 def CIR_YieldOp : CIR_Op<"yield", [
@@ -1640,6 +1640,82 @@ def CIR_CmpOp : CIR_Op<"cmp", [Pure, SameTypeOperands]> {
   let isLLVMLoweringRecursive = true;
 }
 
+//===----------------------------------------------------------------------===//
+// BinOpOverflowOp
+//===----------------------------------------------------------------------===//
+
+def CIR_BinOpOverflowKind : CIR_I32EnumAttr<
+  "BinOpOverflowKind", "checked binary arithmetic operation kind", [
+    I32EnumAttrCase<"Add", 0, "add">,
+    I32EnumAttrCase<"Sub", 1, "sub">,
+    I32EnumAttrCase<"Mul", 2, "mul">
+]>;
+
+def CIR_BinOpOverflowOp : CIR_Op<"binop.overflow", [Pure, SameTypeOperands]> {
+  let summary = "Perform binary integral arithmetic with overflow checking";
+  let description = [{
+    `cir.binop.overflow` performs binary arithmetic operations with overflow
+    checking on integral operands.
+
+    The `kind` argument specifies the kind of arithmetic operation to perform.
+    It can be either `add`, `sub`, or `mul`. The `lhs` and `rhs` arguments
+    specify the input operands of the arithmetic operation. The types of `lhs`
+    and `rhs` must be the same.
+
+    `cir.binop.overflow` produces two SSA values. `result` is the result of the
+    arithmetic operation truncated to its specified type. `overflow` is a
+    boolean value indicating whether overflow happens during the operation.
+
+    The exact semantic of this operation is as follows:
+
+      - `lhs` and `rhs` are promoted to an imaginary integral type that has
+        infinite precision.
+      - The arithmetic operation is performed on the promoted operands.
+      - The infinite-precision result is truncated to the type of `result`. The
+        truncated result is assigned to `result`.
+      - If the truncated result is equal to the un-truncated result, `overflow`
+        is assigned to false. Otherwise, `overflow` is assigned to true.
+  }];
+
+  let arguments = (ins
+    CIR_BinOpOverflowKind:$kind,
+    CIR_IntType:$lhs,
+    CIR_IntType:$rhs
+  );
+
+  let results = (outs CIR_IntType:$result, CIR_BoolType:$overflow);
+
+  let assemblyFormat = [{
+    `(` $kind `,` $lhs `,` $rhs `)` `:` qualified(type($lhs)) `,`
+    `(` qualified(type($result)) `,` qualified(type($overflow)) `)`
+    attr-dict
+  }];
+
+  let builders = [
+    OpBuilder<(ins "cir::IntType":$resultTy,
+                   "cir::BinOpOverflowKind":$kind,
+                   "mlir::Value":$lhs,
+                   "mlir::Value":$rhs), [{
+      auto overflowTy = cir::BoolType::get($_builder.getContext());
+      build($_builder, $_state, resultTy, overflowTy, kind, lhs, rhs);
+    }]>
+  ];
+
+  let extraLLVMLoweringPatternDecl = [{
+    static std::string getLLVMIntrinName(cir::BinOpOverflowKind opKind,
+                                         bool isSigned, unsigned width);
+
+    struct EncompassedTypeInfo {
+      bool sign;
+      unsigned width;
+    };
+
+    static EncompassedTypeInfo computeEncompassedTypeWidth(cir::IntType operandTy,
+                                                           cir::IntType resultTy);
+  }];
+}
+
+
 //===----------------------------------------------------------------------===//
 // BinOp
 //===----------------------------------------------------------------------===//
@@ -2533,7 +2609,9 @@ def CIR_FuncOp : CIR_Op<"func", [
                        OptionalAttr<DictArrayAttr>:$res_attrs,
                        OptionalAttr<FlatSymbolRefAttr>:$aliasee,
                        CIR_OptionalPriorityAttr:$global_ctor_priority,
-                       CIR_OptionalPriorityAttr:$global_dtor_priority);
+                       CIR_OptionalPriorityAttr:$global_dtor_priority,
+                       OptionalAttr<CIR_CXXSpecialMemberAttr>:$cxx_special_member
+   );
 
   let regions = (region AnyRegion:$body);
 
@@ -2572,7 +2650,32 @@ def CIR_FuncOp : CIR_Op<"func", [
     //===------------------------------------------------------------------===//
 
     bool isDeclaration();
-  }];
+
+    //===------------------------------------------------------------------===//
+    // C++ Special Member Functions
+    //===------------------------------------------------------------------===//
+
+    /// Returns true if this function is a C++ special member function.
+    bool isCXXSpecialMemberFunction();
+
+    bool isCxxConstructor();
+    bool isCxxDestructor();
+
+    /// Returns true if this function is a copy or move assignment operator.
+    bool isCxxSpecialAssignment();
+
+    /// Returns the kind of constructor this function represents, if any.
+    std::optional<CtorKind> getCxxConstructorKind();
+
+    /// Returns the kind of assignment operator (move, copy) this function
+    /// represents, if any.
+    std::optional<AssignKind> getCxxSpecialAssignKind();
+
+    /// Returns true if the function is a trivial C++ member functions such as
+    /// trivial default constructor, copy/move constructor, copy/move assignment,
+    /// or destructor.
+    bool isCxxTrivialMemberFunction();
+}];
 
   let hasCustomAssemblyFormat = 1;
   let hasVerifier = 1;
@@ -2752,6 +2855,100 @@ def CIR_CallOp : CIR_CallOpBase<"call", [NoRegionArguments]> {
   ];
 }
 
+//===----------------------------------------------------------------------===//
+// AwaitOp
+//===----------------------------------------------------------------------===//
+
+def CIR_AwaitKind : CIR_I32EnumAttr<"AwaitKind", "await kind", [
+  I32EnumAttrCase<"Init", 0, "init">,
+  I32EnumAttrCase<"User", 1, "user">,
+  I32EnumAttrCase<"Yield", 2, "yield">,
+  I32EnumAttrCase<"Final", 3, "final">
+]>;
+
+def CIR_AwaitOp : CIR_Op<"await",[
+  DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+  RecursivelySpeculatable, NoRegionArguments
+]> {
+  let summary = "Wraps C++ co_await implicit logic";
+  let description = [{
+    The under the hood effect of using C++ `co_await expr` roughly
+    translates to:
+
+    ```c++
+    // co_await expr;
+
+    auto &&x = CommonExpr();
+    if (!x.await_ready()) {
+       ...
+       x.await_suspend(...);
+       ...
+    }
+    x.await_resume();
+    ```
+
+    `cir.await` represents this logic by using 3 regions:
+      - ready: covers veto power from x.await_ready()
+      - suspend: wraps actual x.await_suspend() logic
+      - resume: handles x.await_resume()
+
+    Breaking this up in regions allows individual scrutiny of conditions
+    which might lead to folding some of them out. Lowerings coming out
+    of CIR, e.g. LLVM, should use the `suspend` region to track more
+    lower level codegen (e.g. intrinsic emission for coro.save/coro.suspend).
+
+    There are also 4 flavors of `cir.await` available:
+    - `init`: compiler generated initial suspend via implicit `co_await`.
+    - `user`: also known as normal, representing a user written `co_await`.
+    - `yield`: user written `co_yield` expressions.
+    - `final`: compiler generated final suspend via implicit `co_await`.
+
+    ```mlir
+      cir.scope {
+        ... // auto &&x = CommonExpr();
+        cir.await(user, ready : {
+          ... // x.await_ready()
+        }, suspend : {
+          ... // x.await_suspend()
+        }, resume : {
+          ... // x.await_resume()
+        })
+      }
+    ```
+
+    Note that resulution of the common expression is assumed to happen
+    as part of the enclosing await scope.
+  }];
+
+  let arguments = (ins CIR_AwaitKind:$kind);
+  let regions = (region SizedRegion<1>:$ready,
+                        SizedRegion<1>:$suspend,
+                        SizedRegion<1>:$resume);
+  let assemblyFormat = [{
+    `(` $kind `,`
+    `ready` `:` $ready `,`
+    `suspend` `:` $suspend `,`
+    `resume` `:` $resume `,`
+    `)`
+    attr-dict
+  }];
+
+  let skipDefaultBuilders = 1;
+  let builders = [
+    OpBuilder<(ins
+      "cir::AwaitKind":$kind,
+      CArg<"BuilderCallbackRef",
+           "nullptr">:$readyBuilder,
+      CArg<"BuilderCallbackRef",
+           "nullptr">:$suspendBuilder,
+      CArg<"BuilderCallbackRef",
+           "nullptr">:$resumeBuilder
+      )>
+  ];
+
+  let hasVerifier = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // CopyOp
 //===----------------------------------------------------------------------===//
@@ -2988,6 +3185,39 @@ def CIR_InlineAsmOp : CIR_Op<"asm", [RecursiveMemoryEffects]> {
   let hasCustomAssemblyFormat = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// SqrtOp
+//===----------------------------------------------------------------------===//
+
+def CIR_SqrtOp : CIR_Op<"sqrt", [Pure]> {
+  let summary = "Floating-point square root";
+
+  let description = [{
+    The `cir.sqrt` operation computes the element-wise square root of its input.
+
+    The input must be either:
+      • a floating-point scalar type, or
+      • a vector whose element type is floating-point.
+
+    The result type must match the input type exactly.
+
+    Examples:
+      // scalar
+      %r = cir.sqrt %x : !cir.fp64
+
+      // vector
+      %v = cir.sqrt %vec : !cir.vector<!cir.fp32 x 4>
+  }];
+
+  // input and output types: float or vector-of-float
+  let arguments = (ins CIR_AnyFloatOrVecOfFloatType:$input);
+  let results   = (outs CIR_AnyFloatOrVecOfFloatType:$result);
+
+  let assemblyFormat = [{
+    $input `:` type($input) attr-dict
+  }];
+}
+
 //===----------------------------------------------------------------------===//
 // UnreachableOp
 //===----------------------------------------------------------------------===//
@@ -4018,6 +4248,72 @@ def CIR_RotateOp : CIR_Op<"rotate", [Pure, SameOperandsAndResultType]> {
   let hasFolder = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// FPClass Test Flags
+//===----------------------------------------------------------------------===//
+
+def FPClassTestEnum : CIR_I32EnumAttr<"FPClassTest", "floating-point class test flags", [
+  // Basic flags
+  I32EnumAttrCase<"SignalingNaN", 1, "fcSNan">,
+  I32EnumAttrCase<"QuietNaN", 2, "fcQNan">,
+  I32EnumAttrCase<"NegativeInfinity", 4, "fcNegInf">,
+  I32EnumAttrCase<"NegativeNormal", 8, "fcNegNormal">,
+  I32EnumAttrCase<"NegativeSubnormal", 16, "fcNegSubnormal">,
+  I32EnumAttrCase<"NegativeZero", 32, "fcNegZero">,
+  I32EnumAttrCase<"PositiveZero", 64, "fcPosZero">,
+  I32EnumAttrCase<"PositiveSubnormal", 128, "fcPosSubnormal">,
+  I32EnumAttrCase<"PositiveNormal", 256, "fcPosNormal">,
+  I32EnumAttrCase<"PositiveInfinity", 512, "fcPosInf">,
+
+  // Composite flags
+  I32EnumAttrCase<"Nan", 3, "fcNan">,                   // fcSNan | fcQNan
+  I32EnumAttrCase<"Infinity", 516, "fcInf">,            // fcPosInf | fcNegInf
+  I32EnumAttrCase<"Normal", 264, "fcNormal">,           // fcPosNormal | fcNegNormal
+  I32EnumAttrCase<"Subnormal", 144, "fcSubnormal">,     // fcPosSubnormal | fcNegSubnormal
+  I32EnumAttrCase<"Zero", 96, "fcZero">,                // fcPosZero | fcNegZero
+  I32EnumAttrCase<"PositiveFinite", 448, "fcPosFinite">,// fcPosNormal | fcPosSubnormal | fcPosZero
+  I32EnumAttrCase<"NegativeFinite", 56, "fcNegFinite">, // fcNegNormal | fcNegSubnormal | fcNegZero
+  I32EnumAttrCase<"Finite", 504, "fcFinite">,           // fcPosFinite | fcNegFinite
+  I32EnumAttrCase<"Positive", 960, "fcPositive">,       // fcPosFinite | fcPosInf
+  I32EnumAttrCase<"Negative", 60, "fcNegative">,        // fcNegFinite | fcNegInf
+  I32EnumAttrCase<"All", 1023, "fcAllFlags">,           // fcNan | fcInf | fcFinite
+]> {
+  let cppNamespace = "::cir";
+}
+
+def CIR_IsFPClassOp : CIR_Op<"is_fp_class"> {
+  let summary = "Corresponding to the `__builtin_fpclassify` builtin function in clang";
+
+  let description = [{
+    The `cir.is_fp_class` operation takes a floating-point value as its first
+    argument and a bitfield of flags as its second argument. The operation
+    returns a boolean value indicating whether the floating-point value
+    satisfies the given flags.
+
+    The flags must be a compile time constant and the values are:
+
+    | Bit # | floating-point class |
+    | ----- | -------------------- |
+    |  0    | Signaling NaN        |
+    |  1    | Quiet NaN            |
+    |  2    | Negative infinity    |
+    |  3    | Negative normal      |
+    |  4    | Negative subnormal   |
+    |  5    | Negative zero        |
+    |  6    | Positive zero        |
+    |  7    | Positive subnormal   |
+    |  8    | Positive normal      |
+    |  9    | Positive infinity    |
+  }];
+
+  let arguments = (ins CIR_AnyFloatType:$src,
+                       FPClassTestEnum:$flags);
+  let results = (outs CIR_BoolType:$result);
+  let assemblyFormat = [{
+    $src `,` $flags `:` functional-type($src, $result) attr-dict
+  }];
+}
+
 //===----------------------------------------------------------------------===//
 // Assume Operations
 //===----------------------------------------------------------------------===//
@@ -4202,7 +4498,7 @@ def CIR_ObjSizeOp : CIR_Op<"objsize", [Pure]> {
     When the `min` attribute is present, the operation returns the minimum
     guaranteed accessible size. When absent (max mode), it returns the maximum
     possible object size. Corresponds to `llvm.objectsize`'s `min` argument.
-    
+
     The `dynamic` attribute determines if the value should be evaluated at
     runtime. Corresponds to `llvm.objectsize`'s `dynamic` argument.
 
@@ -4658,6 +4954,44 @@ def CIR_TryOp : CIR_Op<"try",[
   let hasLLVMLowering = false;
 }
 
+//===----------------------------------------------------------------------===//
+// Exception related: EhInflightOp
+//===----------------------------------------------------------------------===//
+
+def CIR_EhInflightOp : CIR_Op<"eh.inflight_exception"> {
+  let summary = "Materialize the catch clause formal parameter";
+  let description = [{
+    `cir.eh.inflight_exception` returns two values:
+      - `exception_ptr`: The exception pointer for the inflight exception
+      - `type_id`: the type info index for the exception type
+    This operation is expected to be the first operation in the unwind
+    destination basic blocks of a `cir.try_call` operation.
+
+    The `cleanup` attribute indicates that clean up code must be run before the
+    values produced by this operation are used to dispatch the exception. This
+    cleanup code must be executed even if the exception is not caught.
+    This helps CIR to pass down more accurate information for LLVM lowering
+    to landingpads.
+
+    Example:
+
+    ```mlir
+    %exception_ptr, %type_id = cir.eh.inflight_exception
+    %exception_ptr, %type_id = cir.eh.inflight_exception [@_ZTIi, @_ZTIPKc]
+    %exception_ptr, %type_id = cir.eh.inflight_exception cleanup
+    ``
+  }];
+
+  let arguments = (ins UnitAttr:$cleanup,
+                       OptionalAttr<FlatSymbolRefArrayAttr>:$catch_type_list);
+  let results = (outs CIR_VoidPtrType:$exception_ptr, CIR_UInt32:$type_id);
+  let assemblyFormat = [{
+    (`cleanup` $cleanup^)?
+    ($catch_type_list^)?
+    attr-dict
+  }];
+}
+
 //===----------------------------------------------------------------------===//
 // Atomic operations
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index ee6900141647f..e91a9e4db229a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -121,20 +121,36 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
     return emitIntrinsicCallOp(*this, expr, "x86.sse.sfence", voidTy);
   case X86::BI_mm_prefetch:
   case X86::BI__rdtsc:
-  case X86::BI__builtin_ia32_rdtscp:
+  case X86::BI__builtin_ia32_rdtscp: {
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
+  }
   case X86::BI__builtin_ia32_lzcnt_u16:
   case X86::BI__builtin_ia32_lzcnt_u32:
-  case X86::BI__builtin_ia32_lzcnt_u64:
+  case X86::BI__builtin_ia32_lzcnt_u64: {
+    mlir::Value isZeroPoison = builder.getFalse(getLoc(expr->getExprLoc()));
+    return emitIntrinsicCallOp(*this, expr, "ctlz", ops[0].getType(),
+                               mlir::ValueRange{ops[0], isZeroPoison});
+  }
   case X86::BI__builtin_ia32_tzcnt_u16:
   case X86::BI__builtin_ia32_tzcnt_u32:
-  case X86::BI__builtin_ia32_tzcnt_u64:
+  case X86::BI__builtin_ia32_tzcnt_u64: {
+    mlir::Value isZeroPoison = builder.getFalse(getLoc(expr->getExprLoc()));
+    return emitIntrinsicCallOp(*this, expr, "cttz", ops[0].getType(),
+                               mlir::ValueRange{ops[0], isZeroPoison});
+  }
   case X86::BI__builtin_ia32_undef128:
   case X86::BI__builtin_ia32_undef256:
   case X86::BI__builtin_ia32_undef512:
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented X86 builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinID));
-    return {};
+    // The x86 definition of "undef" is not the same as the LLVM definition
+    // (PR32176). We leave optimizing away an unnecessary zero constant to the
+    // IR optimizer and backend.
+    // TODO: If we had a "freeze" IR instruction to generate a fixed undef
+    //  value, we should use that here instead of a zero.
+    return builder.getNullValue(convertType(expr->getType()),
+                                getLoc(expr->getExprLoc()));
   case X86::BI__builtin_ia32_vec_ext_v4hi:
   case X86::BI__builtin_ia32_vec_ext_v16qi:
   case X86::BI__builtin_ia32_vec_ext_v8hi:
@@ -169,10 +185,26 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   case X86::BI__builtin_ia32_vec_set_v16hi:
   case X86::BI__builtin_ia32_vec_set_v8si:
   case X86::BI__builtin_ia32_vec_set_v4di:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
   case X86::BI_mm_setcsr:
-  case X86::BI__builtin_ia32_ldmxcsr:
+  case X86::BI__builtin_ia32_ldmxcsr: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    Address tmp = createMemTemp(expr->getArg(0)->getType(), loc);
+    builder.createStore(loc, ops[0], tmp);
+    return emitIntrinsicCallOp(*this, expr, "x86.sse.ldmxcsr",
+                               builder.getVoidTy(), tmp.getPointer());
+  }
   case X86::BI_mm_getcsr:
-  case X86::BI__builtin_ia32_stmxcsr:
+  case X86::BI__builtin_ia32_stmxcsr: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    Address tmp = createMemTemp(expr->getType(), loc);
+    emitIntrinsicCallOp(*this, expr, "x86.sse.stmxcsr", builder.getVoidTy(),
+                        tmp.getPointer());
+    return builder.createLoad(loc, tmp);
+  }
   case X86::BI__builtin_ia32_xsave:
   case X86::BI__builtin_ia32_xsave64:
   case X86::BI__builtin_ia32_xrstor:
@@ -681,10 +713,24 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   case X86::BI__builtin_ia32_sqrtsh_round_mask:
   case X86::BI__builtin_ia32_sqrtsd_round_mask:
   case X86::BI__builtin_ia32_sqrtss_round_mask:
+    errorNYI("masked round sqrt builtins");
+    return {};
   case X86::BI__builtin_ia32_sqrtpd256:
   case X86::BI__builtin_ia32_sqrtpd:
   case X86::BI__builtin_ia32_sqrtps256:
-  case X86::BI__builtin_ia32_sqrtps:
+  case X86::BI__builtin_ia32_sqrtps: {
+    mlir::Location loc = getLoc(E->getExprLoc());
+    assert(E->getNumArgs() == 1 && "__builtin_ia32_sqrtps takes one argument");
+    mlir::Value arg = emitScalarExpr(E->getArg(0));
+    mlir::Type argTy = arg.getType();
+    if (auto vecTy = argTy.dyn_cast<mlir::VectorType>()) {
+      assert(vecTy.getNumElements() == 4 &&
+             vecTy.getElementType().isa<mlir::FloatType>() &&
+             "__builtin_ia32_sqrtps expects <4 x float> / __m128");
+    }
+    auto sqrt = cir::SqrtOp > ::create(builder, loc, argTy, arg);
+    return sqrt.getResult();
+  }
   case X86::BI__builtin_ia32_sqrtph256:
   case X86::BI__builtin_ia32_sqrtph:
   case X86::BI__builtin_ia32_sqrtph512:
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index d43a462a25092..937c66082ca40 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -30,6 +30,7 @@
 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Export.h"
 #include "mlir/Transforms/DialectConversion.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/CIR/Dialect/IR/CIRAttrs.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
@@ -44,6 +45,96 @@
 
 using namespace cir;
 using namespace llvm;
+using namespace mlir;
+
+static std::string getLLVMIntrinsicNameForType(Type llvmTy) {
+  std::string s;
+  {
+    llvm::raw_string_ostream os(s);
+    llvm::Type *unused = nullptr;
+    os << llvmTy;
+  }
+  if (auto vecTy = llvmTy.dyn_cast<LLVM::LLVMType>()) {
+  }
+  return s;
+}
+
+// Actual lowering
+LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
+    cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
+    ConversionPatternRewriter &rewriter) const {
+
+  Location loc = op.getLoc();
+  MLIRContext *ctx = rewriter.getContext();
+
+  Type cirResTy = op.getResult().getType();
+  Type llvmResTy = getTypeConverter()->convertType(cirResTy);
+  if (!llvmResTy)
+    return op.emitOpError(
+        "expected LLVM dialect result type for cir.sqrt lowering");
+
+  Value operand = adaptor.getInput();
+  Value llvmOperand = operand;
+  if (operand.getType() != llvmResTy) {
+    llvmOperand = rewriter.create<LLVM::BitcastOp>(loc, llvmResTy, operand);
+  }
+
+  // Build the llvm.sqrt.* intrinsic name depending on scalar vs vector result
+  std::string intrinsicName = "llvm.sqrt.";
+  std::string suffix;
+
+  // If the CIR result type is a vector, include the 'vN' part in the suffix.
+  if (auto vec = cirResTy.dyn_cast<cir::VectorType>()) {
+    Type elt = vec.getElementType();
+    if (auto f = elt.dyn_cast<cir::FloatType>()) {
+      unsigned width = f.getWidth();
+      unsigned n = vec.getNumElements();
+      if (width == 32)
+        suffix = "v" + std::to_string(n) + "f32";
+      else if (width == 64)
+        suffix = "v" + std::to_string(n) + "f64";
+      else if (width == 16)
+        suffix = "v" + std::to_string(n) + "f16";
+      else
+        return op.emitOpError("unsupported float width for sqrt");
+    } else {
+      return op.emitOpError("vector element must be floating point for sqrt");
+    }
+  } else if (auto f = cirResTy.dyn_cast<cir::FloatType>()) {
+    // Scalar float
+    unsigned width = f.getWidth();
+    if (width == 32)
+      suffix = "f32";
+    else if (width == 64)
+      suffix = "f64";
+    else if (width == 16)
+      suffix = "f16";
+    else
+      return op.emitOpError("unsupported float width for sqrt");
+  } else {
+    return op.emitOpError("unsupported type for cir.sqrt lowering");
+  }
+
+  intrinsicName += suffix;
+
+  // Ensure the llvm intrinsic function exists at module scope. Insert it at
+  // the start of the module body using an insertion guard.
+  ModuleOp module = op->getParentOfType<ModuleOp>();
+  if (!module.lookupSymbol<LLVM::LLVMFuncOp>(intrinsicName)) {
+    OpBuilder::InsertionGuard guard(rewriter);
+    rewriter.setInsertionPointToStart(module.getBody());
+    auto llvmFnType = LLVM::LLVMType::getFunctionTy(llvmResTy, {llvmResTy},
+                                                    /*isVarArg=*/false);
+    rewriter.create<LLVM::LLVMFuncOp>(loc, intrinsicName, llvmFnType);
+  }
+
+  // Create the call and replace cir.sqrt
+  auto callee = SymbolRefAttr::get(ctx, intrinsicName);
+  rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, llvmResTy, callee,
+                                            ArrayRef<Value>{llvmOperand});
+
+  return mlir::success();
+}
 
 namespace cir {
 namespace direct {
@@ -284,7 +375,10 @@ void convertSideEffectForCall(mlir::Operation *callOp, bool isNothrow,
     memoryEffect = mlir::LLVM::MemoryEffectsAttr::get(
         callOp->getContext(), /*other=*/ModRefInfo::Ref,
         /*argMem=*/ModRefInfo::Ref,
-        /*inaccessibleMem=*/ModRefInfo::Ref);
+        /*inaccessibleMem=*/ModRefInfo::Ref,
+        /*errnoMem=*/ModRefInfo::Ref,
+        /*targetMem0=*/ModRefInfo::Ref,
+        /*targetMem1=*/ModRefInfo::Ref);
     noUnwind = true;
     willReturn = true;
     break;
@@ -293,7 +387,10 @@ void convertSideEffectForCall(mlir::Operation *callOp, bool isNothrow,
     memoryEffect = mlir::LLVM::MemoryEffectsAttr::get(
         callOp->getContext(), /*other=*/ModRefInfo::NoModRef,
         /*argMem=*/ModRefInfo::NoModRef,
-        /*inaccessibleMem=*/ModRefInfo::NoModRef);
+        /*inaccessibleMem=*/ModRefInfo::NoModRef,
+        /*errnoMem=*/ModRefInfo::NoModRef,
+        /*targetMem0=*/ModRefInfo::NoModRef,
+        /*targetMem1=*/ModRefInfo::NoModRef);
     noUnwind = true;
     willReturn = true;
     break;
@@ -670,6 +767,18 @@ mlir::LogicalResult CIRToLLVMASinOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
+mlir::LogicalResult CIRToLLVMIsFPClassOpLowering::matchAndRewrite(
+    cir::IsFPClassOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Value src = adaptor.getSrc();
+  cir::FPClassTest flags = adaptor.getFlags();
+  mlir::IntegerType retTy = rewriter.getI1Type();
+
+  rewriter.replaceOpWithNewOp<mlir::LLVM::IsFPClass>(
+      op, retTy, src, static_cast<uint32_t>(flags));
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMAssumeOpLowering::matchAndRewrite(
     cir::AssumeOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
@@ -1995,7 +2104,6 @@ void CIRToLLVMGlobalOpLowering::setupRegionInitializedLLVMGlobalOp(
   //        attributes are available on cir.global ops. This duplicates code
   //        in CIRToLLVMGlobalOpLowering::matchAndRewrite() but that will go
   //        away when the placeholders are no longer needed.
-  assert(!cir::MissingFeatures::opGlobalConstant());
   const bool isConst = op.getConstant();
   assert(!cir::MissingFeatures::addressSpace());
   const unsigned addrSpace = 0;
@@ -2055,8 +2163,7 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite(
       convertTypeForMemory(*getTypeConverter(), dataLayout, cirSymType);
   // FIXME: These default values are placeholders until the the equivalent
   //        attributes are available on cir.global ops.
-  assert(!cir::MissingFeatures::opGlobalConstant());
-  const bool isConst = false;
+  const bool isConst = op.getConstant();
   assert(!cir::MissingFeatures::addressSpace());
   const unsigned addrSpace = 0;
   const bool isDsoLocal = op.getDsoLocal();
@@ -2570,6 +2677,120 @@ mlir::LogicalResult CIRToLLVMCmpOpLowering::matchAndRewrite(
   return cmpOp.emitError() << "unsupported type for CmpOp: " << type;
 }
 
+mlir::LogicalResult CIRToLLVMBinOpOverflowOpLowering::matchAndRewrite(
+    cir::BinOpOverflowOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Location loc = op.getLoc();
+  cir::BinOpOverflowKind arithKind = op.getKind();
+  cir::IntType operandTy = op.getLhs().getType();
+  cir::IntType resultTy = op.getResult().getType();
+
+  EncompassedTypeInfo encompassedTyInfo =
+      computeEncompassedTypeWidth(operandTy, resultTy);
+  mlir::IntegerType encompassedLLVMTy =
+      rewriter.getIntegerType(encompassedTyInfo.width);
+
+  mlir::Value lhs = adaptor.getLhs();
+  mlir::Value rhs = adaptor.getRhs();
+  if (operandTy.getWidth() < encompassedTyInfo.width) {
+    if (operandTy.isSigned()) {
+      lhs = mlir::LLVM::SExtOp::create(rewriter, loc, encompassedLLVMTy, lhs);
+      rhs = mlir::LLVM::SExtOp::create(rewriter, loc, encompassedLLVMTy, rhs);
+    } else {
+      lhs = mlir::LLVM::ZExtOp::create(rewriter, loc, encompassedLLVMTy, lhs);
+      rhs = mlir::LLVM::ZExtOp::create(rewriter, loc, encompassedLLVMTy, rhs);
+    }
+  }
+
+  std::string intrinName = getLLVMIntrinName(arithKind, encompassedTyInfo.sign,
+                                             encompassedTyInfo.width);
+  auto intrinNameAttr = mlir::StringAttr::get(op.getContext(), intrinName);
+
+  mlir::IntegerType overflowLLVMTy = rewriter.getI1Type();
+  auto intrinRetTy = mlir::LLVM::LLVMStructType::getLiteral(
+      rewriter.getContext(), {encompassedLLVMTy, overflowLLVMTy});
+
+  auto callLLVMIntrinOp = mlir::LLVM::CallIntrinsicOp::create(
+      rewriter, loc, intrinRetTy, intrinNameAttr, mlir::ValueRange{lhs, rhs});
+  mlir::Value intrinRet = callLLVMIntrinOp.getResult(0);
+
+  mlir::Value result = mlir::LLVM::ExtractValueOp::create(
+                           rewriter, loc, intrinRet, ArrayRef<int64_t>{0})
+                           .getResult();
+  mlir::Value overflow = mlir::LLVM::ExtractValueOp::create(
+                             rewriter, loc, intrinRet, ArrayRef<int64_t>{1})
+                             .getResult();
+
+  if (resultTy.getWidth() < encompassedTyInfo.width) {
+    mlir::Type resultLLVMTy = getTypeConverter()->convertType(resultTy);
+    auto truncResult =
+        mlir::LLVM::TruncOp::create(rewriter, loc, resultLLVMTy, result);
+
+    // Extend the truncated result back to the encompassing type to check for
+    // any overflows during the truncation.
+    mlir::Value truncResultExt;
+    if (resultTy.isSigned())
+      truncResultExt = mlir::LLVM::SExtOp::create(
+          rewriter, loc, encompassedLLVMTy, truncResult);
+    else
+      truncResultExt = mlir::LLVM::ZExtOp::create(
+          rewriter, loc, encompassedLLVMTy, truncResult);
+    auto truncOverflow = mlir::LLVM::ICmpOp::create(
+        rewriter, loc, mlir::LLVM::ICmpPredicate::ne, truncResultExt, result);
+
+    result = truncResult;
+    overflow = mlir::LLVM::OrOp::create(rewriter, loc, overflow, truncOverflow);
+  }
+
+  mlir::Type boolLLVMTy =
+      getTypeConverter()->convertType(op.getOverflow().getType());
+  if (boolLLVMTy != rewriter.getI1Type())
+    overflow = mlir::LLVM::ZExtOp::create(rewriter, loc, boolLLVMTy, overflow);
+
+  rewriter.replaceOp(op, mlir::ValueRange{result, overflow});
+
+  return mlir::success();
+}
+
+std::string CIRToLLVMBinOpOverflowOpLowering::getLLVMIntrinName(
+    cir::BinOpOverflowKind opKind, bool isSigned, unsigned width) {
+  // The intrinsic name is `@llvm.{s|u}{opKind}.with.overflow.i{width}`
+
+  std::string name = "llvm.";
+
+  if (isSigned)
+    name.push_back('s');
+  else
+    name.push_back('u');
+
+  switch (opKind) {
+  case cir::BinOpOverflowKind::Add:
+    name.append("add.");
+    break;
+  case cir::BinOpOverflowKind::Sub:
+    name.append("sub.");
+    break;
+  case cir::BinOpOverflowKind::Mul:
+    name.append("mul.");
+    break;
+  }
+
+  name.append("with.overflow.i");
+  name.append(std::to_string(width));
+
+  return name;
+}
+
+CIRToLLVMBinOpOverflowOpLowering::EncompassedTypeInfo
+CIRToLLVMBinOpOverflowOpLowering::computeEncompassedTypeWidth(
+    cir::IntType operandTy, cir::IntType resultTy) {
+  bool sign = operandTy.getIsSigned() || resultTy.getIsSigned();
+  unsigned width =
+      std::max(operandTy.getWidth() + (sign && operandTy.isUnsigned()),
+               resultTy.getWidth() + (sign && resultTy.isUnsigned()));
+  return {sign, width};
+}
+
 mlir::LogicalResult CIRToLLVMShiftOpLowering::matchAndRewrite(
     cir::ShiftOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
@@ -3100,6 +3321,90 @@ mlir::LogicalResult CIRToLLVMAllocExceptionOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
+static mlir::LLVM::LLVMStructType
+getLLVMLandingPadStructTy(mlir::ConversionPatternRewriter &rewriter) {
+  // Create the landing pad type: struct { ptr, i32 }
+  mlir::MLIRContext *ctx = rewriter.getContext();
+  auto llvmPtr = mlir::LLVM::LLVMPointerType::get(ctx);
+  llvm::SmallVector<mlir::Type> structFields = {llvmPtr, rewriter.getI32Type()};
+  return mlir::LLVM::LLVMStructType::getLiteral(ctx, structFields);
+}
+
+mlir::LogicalResult CIRToLLVMEhInflightOpLowering::matchAndRewrite(
+    cir::EhInflightOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  auto llvmFn = op->getParentOfType<mlir::LLVM::LLVMFuncOp>();
+  assert(llvmFn && "expected LLVM function parent");
+  mlir::Block *entryBlock = &llvmFn.getRegion().front();
+  assert(entryBlock->isEntryBlock());
+
+  mlir::ArrayAttr catchListAttr = op.getCatchTypeListAttr();
+  mlir::SmallVector<mlir::Value> catchSymAddrs;
+
+  auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext());
+  mlir::Location loc = op.getLoc();
+
+  // %landingpad = landingpad { ptr, i32 }
+  // Note that since llvm.landingpad has to be the first operation on the
+  // block, any needed value for its operands has to be added somewhere else.
+  if (catchListAttr) {
+    //   catch ptr @_ZTIi
+    //   catch ptr @_ZTIPKc
+    for (mlir::Attribute catchAttr : catchListAttr) {
+      auto symAttr = cast<mlir::FlatSymbolRefAttr>(catchAttr);
+      // Generate `llvm.mlir.addressof` for each symbol, and place those
+      // operations in the LLVM function entry basic block.
+      mlir::OpBuilder::InsertionGuard guard(rewriter);
+      rewriter.setInsertionPointToStart(entryBlock);
+      mlir::Value addrOp = mlir::LLVM::AddressOfOp::create(
+          rewriter, loc, llvmPtrTy, symAttr.getValue());
+      catchSymAddrs.push_back(addrOp);
+    }
+  } else if (!op.getCleanup()) {
+    // We need to emit catch-all only if cleanup is not set, because when we
+    // have catch-all handler, there is no case when we set would unwind past
+    // the handler
+    mlir::OpBuilder::InsertionGuard guard(rewriter);
+    rewriter.setInsertionPointToStart(entryBlock);
+    mlir::Value nullOp = mlir::LLVM::ZeroOp::create(rewriter, loc, llvmPtrTy);
+    catchSymAddrs.push_back(nullOp);
+  }
+
+  // %slot = extractvalue { ptr, i32 } %x, 0
+  // %selector = extractvalue { ptr, i32 } %x, 1
+  mlir::LLVM::LLVMStructType llvmLandingPadStructTy =
+      getLLVMLandingPadStructTy(rewriter);
+  auto landingPadOp = mlir::LLVM::LandingpadOp::create(
+      rewriter, loc, llvmLandingPadStructTy, catchSymAddrs);
+
+  if (op.getCleanup())
+    landingPadOp.setCleanup(true);
+
+  mlir::Value slot =
+      mlir::LLVM::ExtractValueOp::create(rewriter, loc, landingPadOp, 0);
+  mlir::Value selector =
+      mlir::LLVM::ExtractValueOp::create(rewriter, loc, landingPadOp, 1);
+  rewriter.replaceOp(op, mlir::ValueRange{slot, selector});
+
+  // Landing pads are required to be in LLVM functions with personality
+  // attribute.
+  // TODO(cir): for now hardcode personality creation in order to start
+  // adding exception tests, once we annotate CIR with such information,
+  // change it to be in FuncOp lowering instead.
+  mlir::OpBuilder::InsertionGuard guard(rewriter);
+  // Insert personality decl before the current function.
+  rewriter.setInsertionPoint(llvmFn);
+  auto personalityFnTy =
+      mlir::LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {},
+                                        /*isVarArg=*/true);
+
+  const StringRef fnName = "__gxx_personality_v0";
+  createLLVMFuncOpIfNotExist(rewriter, op, fnName, personalityFnTy);
+  llvmFn.setPersonality(fnName);
+
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMTrapOpLowering::matchAndRewrite(
     cir::TrapOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
@@ -3843,6 +4148,12 @@ mlir::LogicalResult CIRToLLVMBlockAddressOpLowering::matchAndRewrite(
   return mlir::failure();
 }
 
+mlir::LogicalResult CIRToLLVMAwaitOpLowering::matchAndRewrite(
+    cir::AwaitOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  return mlir::failure();
+}
+
 std::unique_ptr<mlir::Pass> createConvertCIRToLLVMPass() {
   return std::make_unique<ConvertCIRToLLVMPass>();
 }
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
index 0591de545b81d..be6a380372efe 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
@@ -12,11 +12,25 @@
 #ifndef CLANG_CIR_LOWERTOLLVM_H
 #define CLANG_CIR_LOWERTOLLVM_H
 
+#include "mlir/Conversion/PatternRewriter.h"
 #include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 
+namespace cir {
+class SqrtOp;
+}
+
+class CIRToLLVMSqrtOpLowering : public mlir::OpConversionPattern<cir::SqrtOp> {
+public:
+  using mlir::OpConversionPattern<cir::SqrtOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override;
+};
+
 namespace cir {
 
 namespace direct {
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
new file mode 100644
index 0000000000000..6e1dace82928c
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
@@ -0,0 +1,46 @@
+// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtss, sqrtsd, etc.)
+// RUN: %clang_cc1 -fcir -triple x86_64-unknown-linux-gnu -O0 %s -emit-cir -o - | FileCheck %s
+
+#include <immintrin.h>
+
+// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit)
+__m128 test_sqrtps(__m128 x) {
+  return __builtin_ia32_sqrtps(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit)
+__m256 test_sqrtps256(__m256 x) {
+  return __builtin_ia32_sqrtps256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit)
+__m512 test_sqrtps512(__m512 x) {
+  return __builtin_ia32_sqrtps512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps512
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit)
+__m128d test_sqrtpd(__m128d x) {
+  return __builtin_ia32_sqrtpd(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit)
+__m256d test_sqrtpd256(__m256d x) {
+  return __builtin_ia32_sqrtpd256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit)
+__m512d test_sqrtpd512(__m512d x) {
+  return __builtin_ia32_sqrtpd512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd512
+// CHECK: cir.sqrt

>From 4a39fd7185cd294b96a4faadc2fa21f2a4d53b6b Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Sat, 29 Nov 2025 09:59:40 +0530
Subject: [PATCH 02/27] Implement sqrt builtins for all vector sizes

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    | 20 ++----
 .../test/CIR/CodeGen/X86/cir-sqrt-builtins.c  | 67 +++++++++++++++++++
 2 files changed, 73 insertions(+), 14 deletions(-)
 create mode 100644 clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 45c0de322925a..f8a139ec7a8e0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -786,24 +786,16 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   case X86::BI__builtin_ia32_sqrtpd256:
   case X86::BI__builtin_ia32_sqrtpd:
   case X86::BI__builtin_ia32_sqrtps256:
-  case X86::BI__builtin_ia32_sqrtps: {
-    mlir::Location loc = getLoc(expr->getExprLoc());
-    assert(expr->getNumArgs() == 1 && "__builtin_ia32_sqrtps takes one argument");
-    mlir::Value arg = emitScalarExpr(expr->getArg(0));
-    mlir::Type argTy = arg.getType();
-    if (auto vecTy = argTy.dyn_cast<mlir::VectorType>()) {
-      assert(vecTy.getNumElements() == 4 &&
-             vecTy.getElementType().isa<mlir::FloatType>() &&
-             "__builtin_ia32_sqrtps expects <4 x float> / __m128");
-    }
-    auto sqrt = cir::SqrtOp::create(builder, loc, argTy, arg);
-    return sqrt.getResult();
-  }
+  case X86::BI__builtin_ia32_sqrtps:
   case X86::BI__builtin_ia32_sqrtph256:
   case X86::BI__builtin_ia32_sqrtph:
   case X86::BI__builtin_ia32_sqrtph512:
   case X86::BI__builtin_ia32_sqrtps512:
-  case X86::BI__builtin_ia32_sqrtpd512:
+  case X86::BI__builtin_ia32_sqrtpd512: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    mlir::Value arg = ops[0];
+    return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult();
+  }
   case X86::BI__builtin_ia32_pmuludq128:
   case X86::BI__builtin_ia32_pmuludq256:
   case X86::BI__builtin_ia32_pmuludq512:
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
new file mode 100644
index 0000000000000..ef5cb954e3efe
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
@@ -0,0 +1,67 @@
+// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtph, etc.)
+// RUN: %clang_cc1 -fclangir -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -emit-cir %s -o - | FileCheck %s
+
+#include <immintrin.h>
+
+// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit)
+__m128 test_sqrtps(__m128 x) {
+  return __builtin_ia32_sqrtps(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit)
+__m256 test_sqrtps256(__m256 x) {
+  return __builtin_ia32_sqrtps256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit)
+__m512 test_sqrtps512(__m512 x) {
+  return __builtin_ia32_sqrtps512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtps512
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit)
+__m128d test_sqrtpd(__m128d x) {
+  return __builtin_ia32_sqrtpd(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit)
+__m256d test_sqrtpd256(__m256d x) {
+  return __builtin_ia32_sqrtpd256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit)
+__m512d test_sqrtpd512(__m512d x) {
+  return __builtin_ia32_sqrtpd512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtpd512
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtph - half precision vector sqrt (128-bit)
+__m128h test_sqrtph(__m128h x) {
+  return __builtin_ia32_sqrtph(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtph
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtph256 - half precision vector sqrt (256-bit)
+__m256h test_sqrtph256(__m256h x) {
+  return __builtin_ia32_sqrtph256(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtph256
+// CHECK: cir.sqrt
+
+// Test __builtin_ia32_sqrtph512 - half precision vector sqrt (512-bit)
+__m512h test_sqrtph512(__m512h x) {
+  return __builtin_ia32_sqrtph512(x);
+}
+// CHECK-LABEL: cir.func @test_sqrtph512
+// CHECK: cir.sqrt
\ No newline at end of file

>From ef3fd9711494e864190932566bcfe46231b95c51 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Sun, 30 Nov 2025 11:44:23 +0530
Subject: [PATCH 03/27] Test file renamed

---
 .../CIR/CodeGen/X86/cir-sqrtps-builtins.c     | 46 -------------------
 1 file changed, 46 deletions(-)
 delete mode 100644 clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c

diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
deleted file mode 100644
index 6e1dace82928c..0000000000000
--- a/clang/test/CIR/CodeGen/X86/cir-sqrtps-builtins.c
+++ /dev/null
@@ -1,46 +0,0 @@
-// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtss, sqrtsd, etc.)
-// RUN: %clang_cc1 -fcir -triple x86_64-unknown-linux-gnu -O0 %s -emit-cir -o - | FileCheck %s
-
-#include <immintrin.h>
-
-// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit)
-__m128 test_sqrtps(__m128 x) {
-  return __builtin_ia32_sqrtps(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit)
-__m256 test_sqrtps256(__m256 x) {
-  return __builtin_ia32_sqrtps256(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps256
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit)
-__m512 test_sqrtps512(__m512 x) {
-  return __builtin_ia32_sqrtps512(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps512
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit)
-__m128d test_sqrtpd(__m128d x) {
-  return __builtin_ia32_sqrtpd(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit)
-__m256d test_sqrtpd256(__m256d x) {
-  return __builtin_ia32_sqrtpd256(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd256
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit)
-__m512d test_sqrtpd512(__m512d x) {
-  return __builtin_ia32_sqrtpd512(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd512
-// CHECK: cir.sqrt

>From 97056731fce0a5e5c2185e16986e0189cec95c7b Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Tue, 2 Dec 2025 21:29:57 +0530
Subject: [PATCH 04/27] Add sqrt changes patch

---
 my-sqrt-changes.patch | Bin 0 -> 12058 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 my-sqrt-changes.patch

diff --git a/my-sqrt-changes.patch b/my-sqrt-changes.patch
new file mode 100644
index 0000000000000000000000000000000000000000..87c0ca69ac8abe6aaa684ffbbce3c65e342f6066
GIT binary patch
literal 12058
zcmdU#Z%-V_5yo4}yGZ#CbNCYBz=91N*n~S7WA4rpIa`=G7o8$*fn7{u*uCAw4m$Z$
z<U{2H<$aRp*R at 6WjAxexmdK}(%}h^sRdsd!d8%gr`=6t5656_o(9m_Pr-AO9VKE$r
zxB7dgu|<uZ#pm;I6k3&cTj6=w)z~x5YHCeOGY5LMQG5F$bVENZgg2snP+31c+P?|E
zj`AVHK>v=!tu3i|F2ZT(#J5Fw7#`?a4J*1>g9PmBADXqIw at 2E?gZTD9vmS+IJum2D
zRow**wAax3yVD$8=HgXIgUI_%f6v0X_V+gK<4JfT2^!kLd{_!=x}TH0^ZHll&67B%
zp*4%)%XlI0$LEtq3vtQMw2%h7;@i_M2YS;H_kQGPyPx#`-$zYt4foOrJex$wX4)*@
z72&0L_M&ZeqO}fWOPuPCuGis at x_hbjo$xn}Vt?}*IUk;DPxn55Q#)vChOa)+eJkz*
zYVV{s&bbKhwLjM69JXa4%<k%q?|vbZw+OHF7CLV<V-RhAJ?fhidM689kc7SHQ9JUF
zt*{-w)0%CK4 at 4W!YsVRd=9eYIM(y2cru{1Sm-52zrOlnv>V|k>FK6O_x1Z>~7uLls
z*GA)V?|;_(z3^?e_l8zf_g;5SZda39!rjli%C7cwRwvPlQ=~vw7X5m<!@}Kq>g}Ji
zNpsAjs=kw-RMOn1uP>AJUbGvFlH`glS=gp*_nWXkj3E26yyqI*ku`70diM3Sd-?W-
zyz`Zwz8!kYy=aeU{7f3YJ-sAlo~Md1BcH>Qdh$ZcM^6<M$OxQqZ<v#2E~P>qPTum|
zxb=9_Sx9;9<}~)gMU1MJAFo3P?$5Lj(<AZ9q7r#_QaZ#0G|`Sx5o<?N#2Q9jHaIa_
zo}2R-eF8a-m;@_qipyZ4v$-y1-%VN5(~~?|!$P!<!^=}}9>icnT;%Y at TU-AB7rhN6
z``zI;sG}$R3hxAsHHBp!%Oh80kB`F_dIGDql`j{Rtsd+7K-qd(Z(2fXOS)gtsw%#-
zEOU3*%q4epc2@}3`8dv7)=nR3 at AKL_ST8*<OE-r3$U{U3M|#BNvltVJyqog((-_5y
zhzHC+_v6ThthX88&qOr!gW@@P at jUwGiSSHIxT_U$bx$#DJD!Z8uYUMz_)0u~jwf2^
z4nzYg2OB|kOLY2r?r1$y^dqGM&AJF*M+{^*rw}saME@|P^kMiHOPPy4^r`1ilM-zB
zlQcJoy8R_dZ?aacDBn%J?evHHb?bQ<EvHqojJc9!^flvD7fAm?Z#elGS;Je|gH;?<
zqYdSvPK***6I{;Y3UCA!Ku0>B8QRNSw3ad#cG8y}_e3KhdE at rO&|+-`Rtr9a@{d{p
zj#!8sImaJ0+7SivWU7Y>&BLa<l8S7ZqsLbCfm4l=NgJ`o!0t!w0{VC^nH at SrMx$$J
zGT&V;Ynn&ZYko%DhEpq2V^357cDAhb>z}{+SNOM{n)=tiLi2~3MUTMhD?>5VUDP<^
zDHA)W(wIrD^+tE5iL#DB)kc at yql0lWjn|4{d8%dXCeckAvWP=%n9zK#%e02<L=`y@
zwF}vV?Y6vUs>Nos1R`I*R@;UFutwKpnFgow+o&Uap=yD=BTG`reRlb_EgPY?Q704h
zjAwoVbv)l`<|v-Fw#qlBxKWX3S-H80vaTpEBz_+h4J at +fwrmpSnx_v1_UJ^fG9O51
zh;DPsFbkxbPgd5 at +O~Wy*-w(fGZU74EDr9W*c&>Sjh at Go=f_RSL!R=;^g=6NM{X~~
zA&ooYTo0dT-h2OY&zy&I&+QXlT*lpYeOvxH{Ukx17Ou+1A4kc2cf`u|7#-bTW-GfV
zx){c9=v3)*3`W?5WqY#?v)oTT+q-}LNfv^}t=H64^yQqX5YYH9V=t3c7u44vUa#%8
z<Z0yCZ}ji%a@?N0`nKx%d!g4=MbATNr7N9Oar3k&qj91MQV}nyDGp*au_SM$3%nX5
zCRIf%RuxVC-_rPz<}JlaWAdtEno^IdD5lB7tBEz~Wm!o7p)RV4^t`NRo8{e<b$R&f
zn6u8MCy$GE#$PLf(naxZ1aqkpKrwdWWh)?0cI7Lxaa~{Yj<w#p_XirITVwX}T-}cO
zq5lva$(n{5j4qs>s=nFRRbII-DoyQfNpBpN_Lb-N#h2)cZR|@%=>1((%txoe1G;z3
zqd%5M!m?mKWa+7M_ZR7DRyf)BZ2C@;8YZ=lds&x#txEaB@)XFM>W<owUg*!^!)3*G
zdo6Z#I!p4`)hV6~G^cmYeBGM9gobOZhUTk`T}^2j<P37?$OjB197p}{#F1Im;tk;}
z8*~5iclT>fFZXMkmsd2m<>Qi*_YGTMS7j-`)2`?QfWZ2CcR%h6{6jSQoi1v@$vm6W
z039~v%T|H$rHVLJMR>`QlZ?48@)&1pP1_#NUd0$>HIVDMyf53)tU6Ok at AbSKE4q|C
z8#m*;Fg#D6hmH(8rwG0zADJJ1`CF4kJ{mrKiqSi1A-^-s&K`Zg+YvcACeL?Im$k1m
z-H_#w5vgd`YtZ*sLwmxCQe at A$ru*yJ7<hsX0ND<k+Yn~3Iw#c`^ug%#V!NlZW=^Hf
z?hNbJ`MGB-vj&gGpc(oRbTYu1bQ?DG<a&6O^_$Ruc`^05*%O*fPi{*oTckKs=3mdD
zi2XkFpUX3QnTJhU_)R89I+1c_aar6tcC~BzMqq^G5w_ZUg*@F3D|Cew^7wu;Wc)0m
z5b6Nz#lJ`yZZ5x at 921w<O+vGZhL;sHs8X<@mUj72_YakALBp#$zqamIgq;p`w<cV?
zqOqH!;hV`jDGt6Fb&sEXPgs#p;GrO3f2O-`bLd!GW<1nb9(Bl?{CdVZIk{D0d-U55
zm5ms8ltd5VNYSc{mS;83Jvg6D<w#$nEBR6k>t*!H^57GPy6_Imr*$3l#x?ffQsr?4
ze<xzoOKmHjSl-SNONt^?V8ompuRPb7b#qwhKAp#SliH$UqGcVCDz#keSJ6X<Mit4u
z$72*fi+z{G<9^h5ip*uLI~L%CwxtO}*kJ3r4i;-YUgaIEyi=R{kSR|zG8wEtwmCds
zdf1@#)I;=oY*F85_?^kF-lu$oRL2?_=q$)xd<`9~jqByu at tVs0Wt4qhwNHzd<=!5C
z!5OAK*6%6VPeK`Hjc%I@^2iJ6XFqBaZ_hKTb@;4n?KW!I0<@Z2JsHRCD)Dmro))61
zY8$R)+nh|}taJ2{ETK$kl$EFXP~n=1GVoP~h=F~9IP%jm3#@*}$?ey^3z+6i-yipH
z>!^(SdXW23d8f!<j-2BoxktfW?aU)No<A-UnqJIn^L1#3T~jgJ*BSaCXw1G4*%u#+
z4Aukb3g=MA97XH0-)Q(;A^`b;_24Y4{_u0MkL?plOpf>B)YR;5j8epdb|n*WYGf#4
z3t7*40d4)|`<By1UW*?t)A(PKRy$W{`@Lw8L5NHCJ5s2Ek at ob~f8n>NJD;16PmYz+
z`rGnCo6txd#WhuU52V9YjU5UxeG!oq80xXG)0)QUF+LEoSkiqn)(MmTPT}r!#y%#+
zR at 2p<V=jKd;1#RgJK8dfy%+U7>Ni>Rn$Ly^gw7#P*oplnl(&3Owo#AR`5OB at Q~DiK
z5ytbY;>%~^w5JS<)!N<7X*m*f4DZiGm+~)`t|y~w!+!r}|NB^#_I3B~bjnoATDrML
i>Z$uV<~q4bZj<k6%t_u&-c)?ycV>LO at Ne}=*8c%%jt^@9

literal 0
HcmV?d00001


>From 21119e5ae7529285662c0e9dc6c0024e07a5899b Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:19:33 +0530
Subject: [PATCH 05/27] group with other floating point ops

---
 clang/include/clang/CIR/Dialect/IR/CIROps.td  | 41 ++--------
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    |  4 +-
 .../test/CIR/CodeGen/X86/cir-sqrt-builtins.c  | 80 +++++--------------
 3 files changed, 31 insertions(+), 94 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 2dc71c68f8a94..dc9e3c6a486d6 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -3275,39 +3275,6 @@ def CIR_InlineAsmOp : CIR_Op<"asm", [RecursiveMemoryEffects]> {
   let hasCustomAssemblyFormat = 1;
 }
 
-//===----------------------------------------------------------------------===//
-// SqrtOp
-//===----------------------------------------------------------------------===//
-
-def CIR_SqrtOp : CIR_Op<"sqrt", [Pure]> {
-  let summary = "Floating-point square root";
-
-  let description = [{
-    The `cir.sqrt` operation computes the element-wise square root of its input.
-
-    The input must be either:
-      • a floating-point scalar type, or
-      • a vector whose element type is floating-point.
-
-    The result type must match the input type exactly.
-
-    Examples:
-      // scalar
-      %r = cir.sqrt %x : !cir.fp64
-
-      // vector
-      %v = cir.sqrt %vec : !cir.vector<!cir.fp32 x 4>
-  }];
-
-  // input and output types: float or vector-of-float
-  let arguments = (ins CIR_AnyFloatOrVecOfFloatType:$input);
-  let results   = (outs CIR_AnyFloatOrVecOfFloatType:$result);
-
-  let assemblyFormat = [{
-    $input `:` type($input) attr-dict
-  }];
-}
-
 //===----------------------------------------------------------------------===//
 // UnreachableOp
 //===----------------------------------------------------------------------===//
@@ -4664,6 +4631,14 @@ def CIR_PtrDiffOp : CIR_Op<"ptr_diff", [Pure, SameTypeOperands]> {
 // Floating Point Ops
 //===----------------------------------------------------------------------===//
 
+def CIR_SqrtOp : CIR_UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp"> {
+  let summary = "Floating-point square root operation";
+  
+  let description = [{
+    Computes the square root of a floating-point value or vector.
+  }];
+}
+
 class CIR_UnaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
     : CIR_Op<mnemonic, [Pure, SameOperandsAndResultType]>
 {
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index f8a139ec7a8e0..35ba0f48ce6d8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -781,14 +781,14 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   case X86::BI__builtin_ia32_sqrtsh_round_mask:
   case X86::BI__builtin_ia32_sqrtsd_round_mask:
   case X86::BI__builtin_ia32_sqrtss_round_mask:
-    errorNYI("masked round sqrt builtins");
-    return {};
   case X86::BI__builtin_ia32_sqrtpd256:
   case X86::BI__builtin_ia32_sqrtpd:
   case X86::BI__builtin_ia32_sqrtps256:
   case X86::BI__builtin_ia32_sqrtps:
   case X86::BI__builtin_ia32_sqrtph256:
   case X86::BI__builtin_ia32_sqrtph:
+    errorNYI("Unimplemented builtin");
+    return {};
   case X86::BI__builtin_ia32_sqrtph512:
   case X86::BI__builtin_ia32_sqrtps512:
   case X86::BI__builtin_ia32_sqrtpd512: {
diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
index ef5cb954e3efe..97993cabf0ebf 100644
--- a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
@@ -1,67 +1,29 @@
-// Test for x86 sqrt builtins (sqrtps, sqrtpd, sqrtph, etc.)
-// RUN: %clang_cc1 -fclangir -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -emit-cir %s -o - | FileCheck %s
-
 #include <immintrin.h>
+// Test X86-specific sqrt builtins
 
-// Test __builtin_ia32_sqrtps - single precision vector sqrt (128-bit)
-__m128 test_sqrtps(__m128 x) {
-  return __builtin_ia32_sqrtps(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtps256 - single precision vector sqrt (256-bit)
-__m256 test_sqrtps256(__m256 x) {
-  return __builtin_ia32_sqrtps256(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps256
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtps512 - single precision vector sqrt (512-bit)
-__m512 test_sqrtps512(__m512 x) {
-  return __builtin_ia32_sqrtps512(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtps512
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd - double precision vector sqrt (128-bit)
-__m128d test_sqrtpd(__m128d x) {
-  return __builtin_ia32_sqrtpd(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd256 - double precision vector sqrt (256-bit)
-__m256d test_sqrtpd256(__m256d x) {
-  return __builtin_ia32_sqrtpd256(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd256
-// CHECK: cir.sqrt
-
-// Test __builtin_ia32_sqrtpd512 - double precision vector sqrt (512-bit)
-__m512d test_sqrtpd512(__m512d x) {
-  return __builtin_ia32_sqrtpd512(x);
-}
-// CHECK-LABEL: cir.func @test_sqrtpd512
-// CHECK: cir.sqrt
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
 
-// Test __builtin_ia32_sqrtph - half precision vector sqrt (128-bit)
-__m128h test_sqrtph(__m128h x) {
-  return __builtin_ia32_sqrtph(x);
+// Test __builtin_ia32_sqrtph512
+__m512h test_sqrtph512(__m512h a) {
+  return __builtin_ia32_sqrtph512(a);
 }
-// CHECK-LABEL: cir.func @test_sqrtph
-// CHECK: cir.sqrt
+// CHECK: cir.func @test_sqrtph512
+// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.fp16 x 32>
+// CHECK: cir.return [[RES]]
 
-// Test __builtin_ia32_sqrtph256 - half precision vector sqrt (256-bit)
-__m256h test_sqrtph256(__m256h x) {
-  return __builtin_ia32_sqrtph256(x);
+// Test __builtin_ia32_sqrtps512
+__m512 test_sqrtps512(__m512 a) {
+  return __builtin_ia32_sqrtps512(a);
 }
-// CHECK-LABEL: cir.func @test_sqrtph256
-// CHECK: cir.sqrt
+// CHECK: cir.func @test_sqrtps512
+// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.float x 16>
+// CHECK: cir.return [[RES]]
 
-// Test __builtin_ia32_sqrtph512 - half precision vector sqrt (512-bit)
-__m512h test_sqrtph512(__m512h x) {
-  return __builtin_ia32_sqrtph512(x);
+// Test __builtin_ia32_sqrtpd512
+__m512d test_sqrtpd512(__m512d a) {
+  return __builtin_ia32_sqrtpd512(a);
 }
-// CHECK-LABEL: cir.func @test_sqrtph512
-// CHECK: cir.sqrt
\ No newline at end of file
+// CHECK: cir.func @test_sqrtpd512
+// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.double x 8>
+// CHECK: cir.return [[RES]]
\ No newline at end of file

>From 90878ec8d8d6b5b46286c419c4187f01215b6e4b Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:25:20 +0530
Subject: [PATCH 06/27] place the implementation with other floating point ops

---
 clang/include/clang/CIR/Dialect/IR/CIROps.td | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index dc9e3c6a486d6..fa10848f4397a 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -4631,14 +4631,6 @@ def CIR_PtrDiffOp : CIR_Op<"ptr_diff", [Pure, SameTypeOperands]> {
 // Floating Point Ops
 //===----------------------------------------------------------------------===//
 
-def CIR_SqrtOp : CIR_UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp"> {
-  let summary = "Floating-point square root operation";
-  
-  let description = [{
-    Computes the square root of a floating-point value or vector.
-  }];
-}
-
 class CIR_UnaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
     : CIR_Op<mnemonic, [Pure, SameOperandsAndResultType]>
 {
@@ -4650,6 +4642,14 @@ class CIR_UnaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
   let llvmOp = llvmOpName;
 }
 
+def CIR_SqrtOp : CIR_UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp"> {
+  let summary = "Floating-point square root operation";
+  
+  let description = [{
+    Computes the square root of a floating-point value or vector.
+  }];
+}
+
 def CIR_ACosOp : CIR_UnaryFPToFPBuiltinOp<"acos", "ACosOp"> {
   let summary = "Computes the arcus cosine of the specified value";
   let description = [{

>From 3529f40b48025f543a2b3ced9d6aa63a2241283f Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:30:58 +0530
Subject: [PATCH 07/27] Update
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp  | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 5514a4cd0876d..709e3026e51f1 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -45,30 +45,30 @@
 
 using namespace cir;
 using namespace llvm;
-using namespace mlir;
 
-static std::string getLLVMIntrinsicNameForType(Type llvmTy) {
+
+static std::string getLLVMIntrinsicNameForType(mlir::Type llvmTy) {
   std::string s;
   {
     llvm::raw_string_ostream os(s);
     llvm::Type *unused = nullptr;
     os << llvmTy;
   }
-  if (auto vecTy = llvmTy.dyn_cast<LLVM::LLVMType>()) {
+  if (auto vecTy = llvmTy.dyn_cast<mlir::LLVM::LLVMType>()) {
   }
   return s;
 }
 
 // Actual lowering
-LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
+mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
     cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
-    ConversionPatternRewriter &rewriter) const {
+    mlir::ConversionPatternRewriter &rewriter) const {
 
-  Location loc = op.getLoc();
-  MLIRContext *ctx = rewriter.getContext();
+  mlir::Location loc = op.getLoc();
+  mlir::MLIRContext *ctx = rewriter.getContext();
 
-  Type cirResTy = op.getResult().getType();
-  Type llvmResTy = getTypeConverter()->convertType(cirResTy);
+  mlir::Type cirResTy = op.getResult().getType();
+  mlir::Type llvmResTy = getTypeConverter()->convertType(cirResTy);
   if (!llvmResTy)
     return op.emitOpError(
         "expected LLVM dialect result type for cir.sqrt lowering");

>From 92d0ac3ed203e38e244c0afabb5f3524d1772645 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:44:10 +0530
Subject: [PATCH 08/27] Update clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 35ba0f48ce6d8..eb9ac260f225d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -794,7 +794,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   case X86::BI__builtin_ia32_sqrtpd512: {
     mlir::Location loc = getLoc(expr->getExprLoc());
     mlir::Value arg = ops[0];
-    return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult();
+    return builder.create<cir::SqrtOp>(loc, arg.getType(), arg).getResult();
   }
   case X86::BI__builtin_ia32_pmuludq128:
   case X86::BI__builtin_ia32_pmuludq256:

>From 0385662da0847396b4096ddad2c90bcf1c593d0f Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:45:45 +0530
Subject: [PATCH 09/27] Update
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 709e3026e51f1..a80103764a60a 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -51,7 +51,6 @@ static std::string getLLVMIntrinsicNameForType(mlir::Type llvmTy) {
   std::string s;
   {
     llvm::raw_string_ostream os(s);
-    llvm::Type *unused = nullptr;
     os << llvmTy;
   }
   if (auto vecTy = llvmTy.dyn_cast<mlir::LLVM::LLVMType>()) {

>From ddcb7b8476e796d9945bbde26a39e567853da34e Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:51:05 +0530
Subject: [PATCH 10/27] update
 clang\lib\CIR\Lowering\DirectToLLVM\LowerToLLVM.cpp

---
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 5514a4cd0876d..c17980f7ffbf7 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -54,8 +54,6 @@ static std::string getLLVMIntrinsicNameForType(Type llvmTy) {
     llvm::Type *unused = nullptr;
     os << llvmTy;
   }
-  if (auto vecTy = llvmTy.dyn_cast<LLVM::LLVMType>()) {
-  }
   return s;
 }
 

>From 233efad67b9677f6e77034e6868905070708765a Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:56:02 +0530
Subject: [PATCH 11/27] Update
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h

Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
index be6a380372efe..1f69b7d66f25e 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
@@ -12,7 +12,6 @@
 #ifndef CLANG_CIR_LOWERTOLLVM_H
 #define CLANG_CIR_LOWERTOLLVM_H
 
-#include "mlir/Conversion/PatternRewriter.h"
 #include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Transforms/DialectConversion.h"

>From 9d940bc80e60470e6f5dcc82d74e45dd361acdc2 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 3 Dec 2025 19:57:44 +0530
Subject: [PATCH 12/27] Update
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index ebb41e42a2871..5dfef939126d0 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -120,8 +120,8 @@ mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
   if (!module.lookupSymbol<LLVM::LLVMFuncOp>(intrinsicName)) {
     OpBuilder::InsertionGuard guard(rewriter);
     rewriter.setInsertionPointToStart(module.getBody());
-    auto llvmFnType = LLVM::LLVMType::getFunctionTy(llvmResTy, {llvmResTy},
-                                                    /*isVarArg=*/false);
+    auto llvmFnType = LLVM::LLVMFunctionType::get(ctx, llvmResTy, {llvmResTy},
+                                                  /*isVarArg=*/false);
     rewriter.create<LLVM::LLVMFuncOp>(loc, intrinsicName, llvmFnType);
   }
 

>From 51bbccad4f784a4c44d6562ccef36caaf2f1b521 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 16:18:27 +0530
Subject: [PATCH 13/27] Remove BOM character

---
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index ebb41e42a2871..0395f905c866b 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//===-- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.

>From e5789b65fc43637493e07979a9ac56dfd9cbee37 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 16:29:05 +0530
Subject: [PATCH 14/27] Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 5dfef939126d0..11f042737d658 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.

>From 8937b12959c7a4336b6857c1a57b54e6c99d5457 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 16:31:14 +0530
Subject: [PATCH 15/27] Apply suggestion from @andykaylor

Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index eb9ac260f225d..35ba0f48ce6d8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -794,7 +794,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   case X86::BI__builtin_ia32_sqrtpd512: {
     mlir::Location loc = getLoc(expr->getExprLoc());
     mlir::Value arg = ops[0];
-    return builder.create<cir::SqrtOp>(loc, arg.getType(), arg).getResult();
+    return cir::SqrtOp::create(builder, loc, arg.getType(), arg).getResult();
   }
   case X86::BI__builtin_ia32_pmuludq128:
   case X86::BI__builtin_ia32_pmuludq256:

>From 8a02c504acf42c81bd0c53df89e296480b74c05b Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 16:44:08 +0530
Subject: [PATCH 16/27] add description

---
 clang/include/clang/CIR/Dialect/IR/CIROps.td | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index fa10848f4397a..06eb7d6689362 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -4647,6 +4647,19 @@ def CIR_SqrtOp : CIR_UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp"> {
   
   let description = [{
     Computes the square root of a floating-point value or vector.
+
+    The input must be either:
+      • a floating-point scalar type, or
+      • a vector whose element type is floating-point.
+
+    The result type must match the input type exactly.
+
+    Examples:
+      // scalar
+      %r = cir.sqrt %x : !cir.fp64
+
+      // vector
+      %v = cir.sqrt %vec : !cir.vector<!cir.fp32 x 4>
   }];
 }
 

>From 82a9395517d79c79653194939c107234e1628d05 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 17:17:40 +0530
Subject: [PATCH 17/27] Remove undefined sqrt builtin cases

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 35ba0f48ce6d8..0b796e4e3a860 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -781,12 +781,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   case X86::BI__builtin_ia32_sqrtsh_round_mask:
   case X86::BI__builtin_ia32_sqrtsd_round_mask:
   case X86::BI__builtin_ia32_sqrtss_round_mask:
-  case X86::BI__builtin_ia32_sqrtpd256:
-  case X86::BI__builtin_ia32_sqrtpd:
-  case X86::BI__builtin_ia32_sqrtps256:
-  case X86::BI__builtin_ia32_sqrtps:
-  case X86::BI__builtin_ia32_sqrtph256:
-  case X86::BI__builtin_ia32_sqrtph:
     errorNYI("Unimplemented builtin");
     return {};
   case X86::BI__builtin_ia32_sqrtph512:

>From 6bd328210bcd68abe14e36895f6d587a54b99ed2 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 17:25:40 +0530
Subject: [PATCH 18/27] Remove unused getLLVMIntrinsicNameForType function

---
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 5dfef939126d0..8b8b756a7f691 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -46,16 +46,6 @@
 using namespace cir;
 using namespace llvm;
 
-
-static std::string getLLVMIntrinsicNameForType(mlir::Type llvmTy) {
-  std::string s;
-  {
-    llvm::raw_string_ostream os(s);
-    os << llvmTy;
-  }
-  return s;
-}
-
 // Actual lowering
 mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
     cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,

>From 8232ce8a4de0e8c179d42739b354695987be458f Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 17:37:11 +0530
Subject: [PATCH 19/27] Removed braces

---
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 8b8b756a7f691..4cbea38a30c50 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -62,9 +62,8 @@ mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
 
   Value operand = adaptor.getInput();
   Value llvmOperand = operand;
-  if (operand.getType() != llvmResTy) {
+  if (operand.getType() != llvmResTy)
     llvmOperand = rewriter.create<LLVM::BitcastOp>(loc, llvmResTy, operand);
-  }
 
   // Build the llvm.sqrt.* intrinsic name depending on scalar vs vector result
   std::string intrinsicName = "llvm.sqrt.";
@@ -84,9 +83,8 @@ mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
         suffix = "v" + std::to_string(n) + "f16";
       else
         return op.emitOpError("unsupported float width for sqrt");
-    } else {
+    } else 
       return op.emitOpError("vector element must be floating point for sqrt");
-    }
   } else if (auto f = cirResTy.dyn_cast<cir::FloatType>()) {
     // Scalar float
     unsigned width = f.getWidth();
@@ -98,9 +96,8 @@ mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
       suffix = "f16";
     else
       return op.emitOpError("unsupported float width for sqrt");
-  } else {
+  } else 
     return op.emitOpError("unsupported type for cir.sqrt lowering");
-  }
 
   intrinsicName += suffix;
 

>From bc8e4ccfc22731aaee790659d8b5072ab36be7a7 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 17:39:45 +0530
Subject: [PATCH 20/27] Update
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 74 +------------------
 1 file changed, 4 insertions(+), 70 deletions(-)

diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 4cbea38a30c50..e7a6e8677569a 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -47,77 +47,11 @@ using namespace cir;
 using namespace llvm;
 
 // Actual lowering
-mlir::LogicalResult CIRToLLVMSqrtOpLowering::matchAndRewrite(
-    cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
-    mlir::ConversionPatternRewriter &rewriter) const {
-
-  mlir::Location loc = op.getLoc();
-  mlir::MLIRContext *ctx = rewriter.getContext();
-
-  mlir::Type cirResTy = op.getResult().getType();
-  mlir::Type llvmResTy = getTypeConverter()->convertType(cirResTy);
-  if (!llvmResTy)
-    return op.emitOpError(
-        "expected LLVM dialect result type for cir.sqrt lowering");
-
-  Value operand = adaptor.getInput();
-  Value llvmOperand = operand;
-  if (operand.getType() != llvmResTy)
-    llvmOperand = rewriter.create<LLVM::BitcastOp>(loc, llvmResTy, operand);
-
-  // Build the llvm.sqrt.* intrinsic name depending on scalar vs vector result
-  std::string intrinsicName = "llvm.sqrt.";
-  std::string suffix;
-
-  // If the CIR result type is a vector, include the 'vN' part in the suffix.
-  if (auto vec = cirResTy.dyn_cast<cir::VectorType>()) {
-    Type elt = vec.getElementType();
-    if (auto f = elt.dyn_cast<cir::FloatType>()) {
-      unsigned width = f.getWidth();
-      unsigned n = vec.getNumElements();
-      if (width == 32)
-        suffix = "v" + std::to_string(n) + "f32";
-      else if (width == 64)
-        suffix = "v" + std::to_string(n) + "f64";
-      else if (width == 16)
-        suffix = "v" + std::to_string(n) + "f16";
-      else
-        return op.emitOpError("unsupported float width for sqrt");
-    } else 
-      return op.emitOpError("vector element must be floating point for sqrt");
-  } else if (auto f = cirResTy.dyn_cast<cir::FloatType>()) {
-    // Scalar float
-    unsigned width = f.getWidth();
-    if (width == 32)
-      suffix = "f32";
-    else if (width == 64)
-      suffix = "f64";
-    else if (width == 16)
-      suffix = "f16";
-    else
-      return op.emitOpError("unsupported float width for sqrt");
-  } else 
-    return op.emitOpError("unsupported type for cir.sqrt lowering");
-
-  intrinsicName += suffix;
-
-  // Ensure the llvm intrinsic function exists at module scope. Insert it at
-  // the start of the module body using an insertion guard.
-  ModuleOp module = op->getParentOfType<ModuleOp>();
-  if (!module.lookupSymbol<LLVM::LLVMFuncOp>(intrinsicName)) {
-    OpBuilder::InsertionGuard guard(rewriter);
-    rewriter.setInsertionPointToStart(module.getBody());
-    auto llvmFnType = LLVM::LLVMFunctionType::get(ctx, llvmResTy, {llvmResTy},
-                                                  /*isVarArg=*/false);
-    rewriter.create<LLVM::LLVMFuncOp>(loc, intrinsicName, llvmFnType);
-  }
-
-  // Create the call and replace cir.sqrt
-  auto callee = SymbolRefAttr::get(ctx, intrinsicName);
-  rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, llvmResTy, callee,
-                                            ArrayRef<Value>{llvmOperand});
-
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::SqrtOp>(op, resTy,
+                                                    adaptor.getSrc());
   return mlir::success();
+
 }
 
 namespace cir {

>From 92847619f4b000f6dcefe307543dcf6e7b917a14 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 19:37:22 +0530
Subject: [PATCH 21/27] update
 clang\lib\CIR\Lowering\DirectToLLVM\LowerToLLVM.h

---
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
index 1f69b7d66f25e..0591de545b81d 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
@@ -17,19 +17,6 @@
 #include "mlir/Transforms/DialectConversion.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 
-namespace cir {
-class SqrtOp;
-}
-
-class CIRToLLVMSqrtOpLowering : public mlir::OpConversionPattern<cir::SqrtOp> {
-public:
-  using mlir::OpConversionPattern<cir::SqrtOp>::OpConversionPattern;
-
-  mlir::LogicalResult
-  matchAndRewrite(cir::SqrtOp op, typename cir::SqrtOp::Adaptor adaptor,
-                  mlir::ConversionPatternRewriter &rewriter) const override;
-};
-
 namespace cir {
 
 namespace direct {

>From 8647b5c719a7d91c3dbd3954b022621c3b550aaf Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 19:41:21 +0530
Subject: [PATCH 22/27] Update clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c

Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
 clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
index 97993cabf0ebf..bf496f2ea733d 100644
--- a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
@@ -2,7 +2,11 @@
 // Test X86-specific sqrt builtins
 
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
-// RUN: FileCheck --input-file=%t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
 
 // Test __builtin_ia32_sqrtph512
 __m512h test_sqrtph512(__m512h a) {

>From 4bac65a58020456624d39efb64f27d1301c4bb23 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 19:48:11 +0530
Subject: [PATCH 23/27] Update test

---
 .../test/CIR/CodeGen/X86/cir-sqrt-builtins.c  | 30 +++++++++++++------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
index bf496f2ea733d..a3de192f9e142 100644
--- a/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
+++ b/clang/test/CIR/CodeGen/X86/cir-sqrt-builtins.c
@@ -12,22 +12,34 @@
 __m512h test_sqrtph512(__m512h a) {
   return __builtin_ia32_sqrtph512(a);
 }
-// CHECK: cir.func @test_sqrtph512
-// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.fp16 x 32>
-// CHECK: cir.return [[RES]]
+// CIR: cir.func @test_sqrtph512
+// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.fp16 x 32>
+// CIR: cir.return [[RES]]
+// LLVM: define {{.*}} @test_sqrtph512
+// LLVM: call <32 x half> @llvm.sqrt.v32f16
+// OGCG: define {{.*}} @test_sqrtph512
+// OGCG: call <32 x half> @llvm.sqrt.v32f16
 
 // Test __builtin_ia32_sqrtps512
 __m512 test_sqrtps512(__m512 a) {
   return __builtin_ia32_sqrtps512(a);
 }
-// CHECK: cir.func @test_sqrtps512
-// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.float x 16>
-// CHECK: cir.return [[RES]]
+// CIR: cir.func @test_sqrtps512
+// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.float x 16>
+// CIR: cir.return [[RES]]
+// LLVM: define {{.*}} @test_sqrtps512
+// LLVM: call <16 x float> @llvm.sqrt.v16f32
+// OGCG: define {{.*}} @test_sqrtps512
+// OGCG: call <16 x float> @llvm.sqrt.v16f32
 
 // Test __builtin_ia32_sqrtpd512
 __m512d test_sqrtpd512(__m512d a) {
   return __builtin_ia32_sqrtpd512(a);
 }
-// CHECK: cir.func @test_sqrtpd512
-// CHECK: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.double x 8>
-// CHECK: cir.return [[RES]]
\ No newline at end of file
+// CIR: cir.func @test_sqrtpd512
+// CIR: [[RES:%.*]] = cir.sqrt {{%.*}} : !cir.vector<!cir.double x 8>
+// CIR: cir.return [[RES]]
+// LLVM: define {{.*}} @test_sqrtpd512
+// LLVM: call <8 x double> @llvm.sqrt.v8f64
+// OGCG: define {{.*}} @test_sqrtpd512
+// OGCG: call <8 x double> @llvm.sqrt.v8f64
\ No newline at end of file

>From b1ff2abd50b55470361f721053fc72a9080c20d6 Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Thu, 4 Dec 2025 20:25:55 +0530
Subject: [PATCH 24/27] update
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

---
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index e7a6e8677569a..846fc5c07f798 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.

>From 884300615cd4900e44af48016cd895005821e41f Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Fri, 5 Dec 2025 19:06:33 +0530
Subject: [PATCH 25/27] Remove unused include

---
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 846fc5c07f798..08573c0ae83bb 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -30,7 +30,6 @@
 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Export.h"
 #include "mlir/Transforms/DialectConversion.h"
-#include "clang/Basic/LLVM.h"
 #include "clang/CIR/Dialect/IR/CIRAttrs.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"

>From ed82423252e2b452efc4d3265166c08e797b259e Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Fri, 5 Dec 2025 19:19:21 +0530
Subject: [PATCH 26/27] Move sqrt lowering with other floating point operations

---
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp    | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 08573c0ae83bb..34a1ca3f10c01 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -45,14 +45,6 @@
 using namespace cir;
 using namespace llvm;
 
-// Actual lowering
-  mlir::Type resTy = typeConverter->convertType(op.getType());
-  rewriter.replaceOpWithNewOp<mlir::LLVM::SqrtOp>(op, resTy,
-                                                    adaptor.getSrc());
-  return mlir::success();
-
-}
-
 namespace cir {
 namespace direct {
 
@@ -194,6 +186,14 @@ mlir::LogicalResult CIRToLLVMCopyOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
+mlir::LogicalResult SqrtOpLowering::matchAndRewrite(
+    cir::SqrtOp op, OpAdaptor adaptor, 
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::SqrtOp>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMCosOpLowering::matchAndRewrite(
     cir::CosOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {

>From 961c9f95a70b1368b2e33adeae0aa63fd2b9ae8c Mon Sep 17 00:00:00 2001
From: Priyanshu3820 <10b.priyanshu at gmail.com>
Date: Sat, 6 Dec 2025 09:31:10 +0530
Subject: [PATCH 27/27] Remove BOM character

---
 clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 34a1ca3f10c01..ffaebdcd9f062 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1,4 +1,4 @@
-//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.