[clang] [CIR] Add math and builtin intrinsics support (PR #175233)

Tue Jan 13 14:08:25 PST 2026

https://github.com/adams381 updated https://github.com/llvm/llvm-project/pull/175233

>From c382f2168de24fe5354cf9c8643ea27d31896801 Mon Sep 17 00:00:00 2001
From: Adam Smith <adams at nvidia.com>
Date: Wed, 7 Jan 2026 12:12:35 -0800
Subject: [PATCH 1/6] [CIR] Add floating-point math intrinsics and builtin
 support

This patch adds support for floating-point math intrinsics in CIR,
enabling optimization of math library calls to LLVM intrinsics.

New CIR Operations (CIROps.td):
- CIR_LogOp, CIR_Log10Op, CIR_Log2Op: Logarithm operations
- CIR_SinOp, CIR_TanOp: Trigonometric operations
- CIR_NearbyintOp, CIR_RintOp, CIR_RoundOp, CIR_TruncOp: Rounding operations
- CIR_LroundOp, CIR_LLroundOp, CIR_LrintOp, CIR_LLrintOp: FP-to-int conversions
- CIR_CopysignOp: Sign copy operation
- CIR_FMaxNumOp, CIR_FMinNumOp: Min/max operations
- CIR_FModOp, CIR_PowOp: Arithmetic operations

CIRGenBuiltin.cpp changes:
- Add helper templates: emitUnaryMaybeConstrainedFPToIntBuiltin,
  emitBinaryFPBuiltin, emitBinaryMaybeConstrainedFPBuiltin
- Implement tryEmitFPMathIntrinsic cases for all new operations
- Add handling for predefined library functions (fabs, lround, etc.)
- Implement std::move, std::forward, std::move_if_noexcept, std::as_const builtins

CIRGenExpr.cpp changes:
- Fix hasAttributeNoBuiltin to default to false, matching incubator behavior
- This enables builtin recognition for predefined library functions

CIRGenModule.cpp changes:
- Add logic to skip noinline attribute for functions containing only
  builtin calls that become intrinsics, allowing proper optimization

LowerToLLVM.cpp changes:
- Add LLVM lowering patterns for all new CIR operations
- Each operation maps to its corresponding LLVM intrinsic

Test updates:
- builtin-floating-point.c: New comprehensive test from incubator covering
  all math intrinsics with CIR, LLVM, and OGCG checks
- libc.c: Update to expect cir.fabs intrinsic for fabs/fabsf
- builtin-fcmp-sse.c: Update CHECK patterns for new noinline behavior
- builtin-isfpclass.c: Update to expect cir.is_fp_class intrinsic
---
 clang/include/clang/CIR/Dialect/IR/CIROps.td  |  139 ++
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp       |   79 +-
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp          |    3 +-
 clang/lib/CIR/CodeGen/CIRGenModule.cpp        |   38 +
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp |  154 ++
 .../test/CIR/CodeGen/builtin-floating-point.c | 1636 +++++++++++++++++
 clang/test/CIR/CodeGen/libc.c                 |    4 +-
 .../CodeGenBuiltins/X86/avx512f16c-builtins.c |   18 +-
 .../CIR/CodeGenBuiltins/builtin-fcmp-sse.c    |   24 +-
 .../CIR/CodeGenBuiltins/builtin-isfpclass.c   |    4 +-
 10 files changed, 2070 insertions(+), 29 deletions(-)
 create mode 100644 clang/test/CIR/CodeGen/builtin-floating-point.c

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index e2b582c5c55df..9c6a764d4d85f 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -5156,6 +5156,96 @@ def CIR_Exp2Op : CIR_UnaryFPToFPBuiltinOp<"exp2", "Exp2Op"> {
   }];
 }
 
+def CIR_LogOp : CIR_UnaryFPToFPBuiltinOp<"log", "LogOp"> {
+  let summary = "Computes the floating-point natural logarithm";
+  let description = [{
+    `cir.log` computes the natural logarithm of a floating-point operand and
+    returns a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
+def CIR_Log10Op : CIR_UnaryFPToFPBuiltinOp<"log10", "Log10Op"> {
+  let summary = "Computes the floating-point base-10 logarithm";
+  let description = [{
+    `cir.log10` computes the base-10 logarithm of a floating-point operand and
+    returns a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
+def CIR_Log2Op : CIR_UnaryFPToFPBuiltinOp<"log2", "Log2Op"> {
+  let summary = "Computes the floating-point base-2 logarithm";
+  let description = [{
+    `cir.log2` computes the base-2 logarithm of a floating-point operand and
+    returns a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
+def CIR_NearbyintOp : CIR_UnaryFPToFPBuiltinOp<"nearbyint", "NearbyintOp"> {
+  let summary = "Rounds floating-point value to nearest integer";
+  let description = [{
+    `cir.nearbyint` rounds a floating-point operand to the nearest integer value
+    and returns a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
+def CIR_RintOp : CIR_UnaryFPToFPBuiltinOp<"rint", "RintOp"> {
+  let summary = "Rounds floating-point value to nearest integer";
+  let description = [{
+    `cir.rint` rounds a floating-point operand to the nearest integer value
+    and returns a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
+def CIR_RoundOp : CIR_UnaryFPToFPBuiltinOp<"round", "RoundOp"> {
+  let summary = "Rounds floating-point value to nearest integer";
+  let description = [{
+    `cir.round` rounds a floating-point operand to the nearest integer value
+    and returns a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
+def CIR_SinOp : CIR_UnaryFPToFPBuiltinOp<"sin", "SinOp"> {
+  let summary = "Computes the floating-point sine";
+  let description = [{
+    `cir.sin` computes the sine of a floating-point operand and returns
+    a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
+def CIR_TanOp : CIR_UnaryFPToFPBuiltinOp<"tan", "TanOp"> {
+  let summary = "Computes the floating-point tangent";
+  let description = [{
+    `cir.tan` computes the tangent of a floating-point operand and returns
+    a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
+def CIR_TruncOp : CIR_UnaryFPToFPBuiltinOp<"trunc", "TruncOp"> {
+  let summary = "Truncates floating-point value to integer";
+  let description = [{
+    `cir.trunc` truncates a floating-point operand to an integer value
+    and returns a result of the same type.
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
 def CIR_FAbsOp : CIR_UnaryFPToFPBuiltinOp<"fabs", "FAbsOp"> {
   let summary = "Computes the floating-point absolute value";
   let description = [{
@@ -5183,6 +5273,55 @@ def CIR_FloorOp : CIR_UnaryFPToFPBuiltinOp<"floor", "FloorOp"> {
   }];
 }
 
+class CIR_UnaryFPToIntBuiltinOp<string mnemonic, string llvmOpName>
+    : CIR_Op<mnemonic, [Pure]>
+{
+  let arguments = (ins CIR_AnyFloatType:$src);
+  let results = (outs CIR_IntType:$result);
+
+  let summary = [{
+    Builtin function that takes a floating-point value as input and produces an
+    integral value as output.
+  }];
+
+  let assemblyFormat = [{
+    $src `:` type($src) `->` type($result) attr-dict
+  }];
+
+  let llvmOp = llvmOpName;
+}
+
+def CIR_LroundOp : CIR_UnaryFPToIntBuiltinOp<"lround", "LroundOp">;
+def CIR_LLroundOp : CIR_UnaryFPToIntBuiltinOp<"llround", "LlroundOp">;
+def CIR_LrintOp : CIR_UnaryFPToIntBuiltinOp<"lrint", "LrintOp">;
+def CIR_LLrintOp : CIR_UnaryFPToIntBuiltinOp<"llrint", "LlrintOp">;
+
+class CIR_BinaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
+    : CIR_Op<mnemonic, [Pure, SameOperandsAndResultType]> {
+  let summary = [{
+    libc builtin equivalent ignoring floating-point exceptions and errno.
+  }];
+
+  let arguments = (ins
+    CIR_AnyFloatOrVecOfFloatType:$lhs,
+    CIR_AnyFloatOrVecOfFloatType:$rhs
+  );
+
+  let results = (outs  CIR_AnyFloatOrVecOfFloatType:$result);
+
+  let assemblyFormat = [{
+    $lhs `,` $rhs `:` qualified(type($lhs)) attr-dict
+  }];
+
+  let llvmOp = llvmOpName;
+}
+
+def CIR_CopysignOp : CIR_BinaryFPToFPBuiltinOp<"copysign", "CopySignOp">;
+def CIR_FMaxNumOp : CIR_BinaryFPToFPBuiltinOp<"fmaxnum", "MaxNumOp">;
+def CIR_FMinNumOp : CIR_BinaryFPToFPBuiltinOp<"fminnum", "MinNumOp">;
+def CIR_FModOp : CIR_BinaryFPToFPBuiltinOp<"fmod", "FRemOp">;
+def CIR_PowOp : CIR_BinaryFPToFPBuiltinOp<"pow", "PowOp">;
+
 //===----------------------------------------------------------------------===//
 // Variadic Operations
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 85406e9f6488a..74aecbbb56f6e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -150,6 +150,45 @@ static RValue emitUnaryFPBuiltin(CIRGenFunction &cgf, const CallExpr &e) {
   return RValue::get(call->getResult(0));
 }
 
+template <typename Op>
+static RValue emitUnaryMaybeConstrainedFPToIntBuiltin(CIRGenFunction &cgf,
+                                                      const CallExpr &e) {
+  mlir::Type resultType = cgf.convertType(e.getType());
+  mlir::Value src = cgf.emitScalarExpr(e.getArg(0));
+
+  assert(!cir::MissingFeatures::fpConstraints());
+
+  auto call = Op::create(cgf.getBuilder(), src.getLoc(), resultType, src);
+  return RValue::get(call->getResult(0));
+}
+
+template <typename Op>
+static RValue emitBinaryFPBuiltin(CIRGenFunction &cgf, const CallExpr &e) {
+  mlir::Value arg0 = cgf.emitScalarExpr(e.getArg(0));
+  mlir::Value arg1 = cgf.emitScalarExpr(e.getArg(1));
+
+  mlir::Location loc = cgf.getLoc(e.getExprLoc());
+  mlir::Type ty = cgf.convertType(e.getType());
+  auto call = Op::create(cgf.getBuilder(), loc, ty, arg0, arg1);
+
+  return RValue::get(call->getResult(0));
+}
+
+template <typename Op>
+static mlir::Value emitBinaryMaybeConstrainedFPBuiltin(CIRGenFunction &cgf,
+                                                       const CallExpr &e) {
+  mlir::Value arg0 = cgf.emitScalarExpr(e.getArg(0));
+  mlir::Value arg1 = cgf.emitScalarExpr(e.getArg(1));
+
+  mlir::Location loc = cgf.getLoc(e.getExprLoc());
+  mlir::Type ty = cgf.convertType(e.getType());
+
+  assert(!cir::MissingFeatures::fpConstraints());
+
+  auto call = Op::create(cgf.getBuilder(), loc, ty, arg0, arg1);
+  return call->getResult(0);
+}
+
 static RValue errorBuiltinNYI(CIRGenFunction &cgf, const CallExpr *e,
                               unsigned builtinID) {
 
@@ -301,6 +340,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_ceilf128:
     return emitUnaryMaybeConstrainedFPBuiltin<cir::CeilOp>(cgf, *e);
   case Builtin::BI__builtin_elementwise_ceil:
+    return RValue::getIgnored();
   case Builtin::BIcopysign:
   case Builtin::BIcopysignf:
   case Builtin::BIcopysignl:
@@ -309,7 +349,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_copysignf16:
   case Builtin::BI__builtin_copysignl:
   case Builtin::BI__builtin_copysignf128:
-    return RValue::getIgnored();
+    return emitBinaryFPBuiltin<cir::CopysignOp>(cgf, *e);
   case Builtin::BIcos:
   case Builtin::BIcosf:
   case Builtin::BIcosl:
@@ -386,6 +426,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_fmal:
   case Builtin::BI__builtin_fmaf128:
   case Builtin::BI__builtin_elementwise_fma:
+    return RValue::getIgnored();
   case Builtin::BIfmax:
   case Builtin::BIfmaxf:
   case Builtin::BIfmaxl:
@@ -394,6 +435,8 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_fmaxf16:
   case Builtin::BI__builtin_fmaxl:
   case Builtin::BI__builtin_fmaxf128:
+    return RValue::get(
+        emitBinaryMaybeConstrainedFPBuiltin<cir::FMaxNumOp>(cgf, *e));
   case Builtin::BIfmin:
   case Builtin::BIfminf:
   case Builtin::BIfminl:
@@ -402,6 +445,8 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_fminf16:
   case Builtin::BI__builtin_fminl:
   case Builtin::BI__builtin_fminf128:
+    return RValue::get(
+        emitBinaryMaybeConstrainedFPBuiltin<cir::FMinNumOp>(cgf, *e));
   case Builtin::BIfmaximum_num:
   case Builtin::BIfmaximum_numf:
   case Builtin::BIfmaximum_numl:
@@ -418,6 +463,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_fminimum_numf16:
   case Builtin::BI__builtin_fminimum_numl:
   case Builtin::BI__builtin_fminimum_numf128:
+    return RValue::getIgnored();
   case Builtin::BIfmod:
   case Builtin::BIfmodf:
   case Builtin::BIfmodl:
@@ -426,7 +472,9 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_fmodf16:
   case Builtin::BI__builtin_fmodl:
   case Builtin::BI__builtin_fmodf128:
+    return emitBinaryFPBuiltin<cir::FModOp>(cgf, *e);
   case Builtin::BI__builtin_elementwise_fmod:
+    return RValue::getIgnored();
   case Builtin::BIlog:
   case Builtin::BIlogf:
   case Builtin::BIlogl:
@@ -436,6 +484,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_logl:
   case Builtin::BI__builtin_logf128:
   case Builtin::BI__builtin_elementwise_log:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::LogOp>(cgf, *e);
   case Builtin::BIlog10:
   case Builtin::BIlog10f:
   case Builtin::BIlog10l:
@@ -445,6 +494,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_log10l:
   case Builtin::BI__builtin_log10f128:
   case Builtin::BI__builtin_elementwise_log10:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::Log10Op>(cgf, *e);
   case Builtin::BIlog2:
   case Builtin::BIlog2f:
   case Builtin::BIlog2l:
@@ -454,6 +504,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_log2l:
   case Builtin::BI__builtin_log2f128:
   case Builtin::BI__builtin_elementwise_log2:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::Log2Op>(cgf, *e);
   case Builtin::BInearbyint:
   case Builtin::BInearbyintf:
   case Builtin::BInearbyintl:
@@ -462,6 +513,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_nearbyintl:
   case Builtin::BI__builtin_nearbyintf128:
   case Builtin::BI__builtin_elementwise_nearbyint:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::NearbyintOp>(cgf, *e);
   case Builtin::BIpow:
   case Builtin::BIpowf:
   case Builtin::BIpowl:
@@ -470,7 +522,10 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_powf16:
   case Builtin::BI__builtin_powl:
   case Builtin::BI__builtin_powf128:
+    return RValue::get(
+        emitBinaryMaybeConstrainedFPBuiltin<cir::PowOp>(cgf, *e));
   case Builtin::BI__builtin_elementwise_pow:
+    return RValue::getIgnored();
   case Builtin::BIrint:
   case Builtin::BIrintf:
   case Builtin::BIrintl:
@@ -480,6 +535,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_rintl:
   case Builtin::BI__builtin_rintf128:
   case Builtin::BI__builtin_elementwise_rint:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::RintOp>(cgf, *e);
   case Builtin::BIround:
   case Builtin::BIroundf:
   case Builtin::BIroundl:
@@ -489,6 +545,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_roundl:
   case Builtin::BI__builtin_roundf128:
   case Builtin::BI__builtin_elementwise_round:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::RoundOp>(cgf, *e);
   case Builtin::BIroundeven:
   case Builtin::BIroundevenf:
   case Builtin::BIroundevenl:
@@ -498,6 +555,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_roundevenl:
   case Builtin::BI__builtin_roundevenf128:
   case Builtin::BI__builtin_elementwise_roundeven:
+    return RValue::getIgnored();
   case Builtin::BIsin:
   case Builtin::BIsinf:
   case Builtin::BIsinl:
@@ -507,6 +565,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_sinl:
   case Builtin::BI__builtin_sinf128:
   case Builtin::BI__builtin_elementwise_sin:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::SinOp>(cgf, *e);
   case Builtin::BIsinh:
   case Builtin::BIsinhf:
   case Builtin::BIsinhl:
@@ -527,6 +586,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_sincosf16:
   case Builtin::BI__builtin_sincosl:
   case Builtin::BI__builtin_sincosf128:
+    return RValue::getIgnored();
   case Builtin::BIsqrt:
   case Builtin::BIsqrtf:
   case Builtin::BIsqrtl:
@@ -536,6 +596,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_sqrtl:
   case Builtin::BI__builtin_sqrtf128:
   case Builtin::BI__builtin_elementwise_sqrt:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::SqrtOp>(cgf, *e);
   case Builtin::BItan:
   case Builtin::BItanf:
   case Builtin::BItanl:
@@ -545,6 +606,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_tanl:
   case Builtin::BI__builtin_tanf128:
   case Builtin::BI__builtin_elementwise_tan:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::TanOp>(cgf, *e);
   case Builtin::BItanh:
   case Builtin::BItanhf:
   case Builtin::BItanhl:
@@ -554,6 +616,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_tanhl:
   case Builtin::BI__builtin_tanhf128:
   case Builtin::BI__builtin_elementwise_tanh:
+    return RValue::getIgnored();
   case Builtin::BItrunc:
   case Builtin::BItruncf:
   case Builtin::BItruncl:
@@ -563,6 +626,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_truncl:
   case Builtin::BI__builtin_truncf128:
   case Builtin::BI__builtin_elementwise_trunc:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::TruncOp>(cgf, *e);
   case Builtin::BIlround:
   case Builtin::BIlroundf:
   case Builtin::BIlroundl:
@@ -570,6 +634,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_lroundf:
   case Builtin::BI__builtin_lroundl:
   case Builtin::BI__builtin_lroundf128:
+    return emitUnaryMaybeConstrainedFPToIntBuiltin<cir::LroundOp>(cgf, *e);
   case Builtin::BIllround:
   case Builtin::BIllroundf:
   case Builtin::BIllroundl:
@@ -577,6 +642,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_llroundf:
   case Builtin::BI__builtin_llroundl:
   case Builtin::BI__builtin_llroundf128:
+    return emitUnaryMaybeConstrainedFPToIntBuiltin<cir::LLroundOp>(cgf, *e);
   case Builtin::BIlrint:
   case Builtin::BIlrintf:
   case Builtin::BIlrintl:
@@ -584,6 +650,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_lrintf:
   case Builtin::BI__builtin_lrintl:
   case Builtin::BI__builtin_lrintf128:
+    return emitUnaryMaybeConstrainedFPToIntBuiltin<cir::LrintOp>(cgf, *e);
   case Builtin::BIllrint:
   case Builtin::BIllrintf:
   case Builtin::BIllrintl:
@@ -591,6 +658,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_llrintf:
   case Builtin::BI__builtin_llrintl:
   case Builtin::BI__builtin_llrintf128:
+    return emitUnaryMaybeConstrainedFPToIntBuiltin<cir::LLrintOp>(cgf, *e);
   case Builtin::BI__builtin_ldexp:
   case Builtin::BI__builtin_ldexpf:
   case Builtin::BI__builtin_ldexpl:
@@ -1605,8 +1673,9 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
   case Builtin::BImove:
   case Builtin::BImove_if_noexcept:
   case Builtin::BIforward:
-  case Builtin::BIforward_like:
   case Builtin::BIas_const:
+    return RValue::get(emitLValue(e->getArg(0)).getPointer());
+  case Builtin::BIforward_like:
   case Builtin::BI__GetExceptionInfo:
   case Builtin::BI__fastfail:
   case Builtin::BIread_pipe:
@@ -1663,6 +1732,12 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
     return emitLibraryCall(*this, fd, e,
                            cgm.getBuiltinLibFunction(fd, builtinID));
 
+  // If this is a predefined lib function (e.g. malloc), emit the call
+  // using exactly the normal call path.
+  if (getContext().BuiltinInfo.isPredefinedLibFunction(builtinID))
+    return emitLibraryCall(*this, fd, e,
+                           emitScalarExpr(e->getCallee()).getDefiningOp());
+
   // Some target-specific builtins can have aggregate return values, e.g.
   // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
   // returnValue to be non-null, so that the target-specific emission code can
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index cd13498e3702f..0e46faf7077c3 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -1869,8 +1869,7 @@ CIRGenCallee CIRGenFunction::emitDirectCallee(const GlobalDecl &gd) {
 
     bool isPredefinedLibFunction =
         cgm.getASTContext().BuiltinInfo.isPredefinedLibFunction(builtinID);
-    // Assume nobuiltins everywhere until we actually read the attributes.
-    bool hasAttributeNoBuiltin = true;
+    bool hasAttributeNoBuiltin = false;
     assert(!cir::MissingFeatures::attributeNoBuiltin());
 
     // When directing calling an inline builtin, call it through it's mangled
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 42df6304628dc..adb2437827fe4 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -20,6 +20,7 @@
 #include "clang/AST/DeclOpenACC.h"
 #include "clang/AST/GlobalDecl.h"
 #include "clang/AST/RecordLayout.h"
+#include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/CIR/Dialect/IR/CIRAttrs.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
@@ -2196,7 +2197,44 @@ void CIRGenModule::setCIRFunctionAttributesForDefinition(
   } else if (codeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) {
     // If inlining is disabled, force everything that isn't always_inline
     // to carry an explicit noinline attribute.
+    // However, don't mark functions as noinline if they only contain
+    // builtin calls that will become intrinsics - these simple wrappers
+    // should be allowed to inline so the intrinsics can be optimized.
     if (!isAlwaysInline) {
+      // Check if this function contains any builtin calls that will become
+      // intrinsics. If so, don't mark as noinline - let the optimizer handle
+      // it.
+      if (auto *fd = dyn_cast<FunctionDecl>(decl)) {
+        if (const Stmt *body = fd->getBody()) {
+          // Walk the function body to find any builtin calls
+          struct BuiltinCallFinder : public StmtVisitor<BuiltinCallFinder> {
+            bool foundBuiltin = false;
+            void VisitCallExpr(CallExpr *CE) {
+              if (auto *callee = CE->getDirectCallee())
+                if (callee->getBuiltinID())
+                  foundBuiltin = true;
+              for (auto *child : CE->children()) {
+                if (child)
+                  Visit(child);
+              }
+            }
+            void VisitStmt(Stmt *S) {
+              if (foundBuiltin)
+                return;
+              for (auto *child : S->children()) {
+                if (child)
+                  Visit(child);
+              }
+            }
+          };
+          BuiltinCallFinder finder;
+          finder.Visit(const_cast<Stmt *>(body));
+          // This function contains a builtin call that will become an
+          // intrinsic - don't mark as noinline.
+          if (finder.foundBuiltin)
+            return;
+        }
+      }
       f.setInlineKind(cir::InlineKind::NoInline);
     }
   } else {
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index d0fa8bae545be..309565b3b46ec 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -218,6 +218,80 @@ mlir::LogicalResult CIRToLLVMExp2OpLowering::matchAndRewrite(
   return mlir::success();
 }
 
+mlir::LogicalResult CIRToLLVMLogOpLowering::matchAndRewrite(
+    cir::LogOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::LogOp>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMLog10OpLowering::matchAndRewrite(
+    cir::Log10Op op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::Log10Op>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMLog2OpLowering::matchAndRewrite(
+    cir::Log2Op op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::Log2Op>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMNearbyintOpLowering::matchAndRewrite(
+    cir::NearbyintOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::NearbyintOp>(op, resTy,
+                                                       adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMRintOpLowering::matchAndRewrite(
+    cir::RintOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::RintOp>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMRoundOpLowering::matchAndRewrite(
+    cir::RoundOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::RoundOp>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMSinOpLowering::matchAndRewrite(
+    cir::SinOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::SinOp>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMTanOpLowering::matchAndRewrite(
+    cir::TanOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::TanOp>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMTruncOpLowering::matchAndRewrite(
+    cir::TruncOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::FTruncOp>(op, resTy,
+                                                    adaptor.getSrc());
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMFloorOpLowering::matchAndRewrite(
     cir::FloorOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
@@ -1544,6 +1618,86 @@ mlir::LogicalResult CIRToLLVMCeilOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
+mlir::LogicalResult CIRToLLVMCopysignOpLowering::matchAndRewrite(
+    cir::CopysignOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::CopySignOp>(
+      op, resTy, adaptor.getLhs(), adaptor.getRhs());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMFMaxNumOpLowering::matchAndRewrite(
+    cir::FMaxNumOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::MaxNumOp>(op, resTy, adaptor.getLhs(),
+                                                    adaptor.getRhs());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMFMinNumOpLowering::matchAndRewrite(
+    cir::FMinNumOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::MinNumOp>(op, resTy, adaptor.getLhs(),
+                                                    adaptor.getRhs());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMFModOpLowering::matchAndRewrite(
+    cir::FModOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::FRemOp>(op, resTy, adaptor.getLhs(),
+                                                  adaptor.getRhs());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMPowOpLowering::matchAndRewrite(
+    cir::PowOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::PowOp>(op, resTy, adaptor.getLhs(),
+                                                 adaptor.getRhs());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMLroundOpLowering::matchAndRewrite(
+    cir::LroundOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::LroundOp>(op, resTy,
+                                                    adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMLLroundOpLowering::matchAndRewrite(
+    cir::LLroundOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::LlroundOp>(op, resTy,
+                                                     adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMLrintOpLowering::matchAndRewrite(
+    cir::LrintOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::LrintOp>(op, resTy, adaptor.getSrc());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMLLrintOpLowering::matchAndRewrite(
+    cir::LLrintOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::LlrintOp>(op, resTy,
+                                                    adaptor.getSrc());
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMAllocaOpLowering::matchAndRewrite(
     cir::AllocaOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
diff --git a/clang/test/CIR/CodeGen/builtin-floating-point.c b/clang/test/CIR/CodeGen/builtin-floating-point.c
new file mode 100644
index 0000000000000..e2c7fddde9408
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin-floating-point.c
@@ -0,0 +1,1636 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t1.cir 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-apple-darwin-macho -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t1.cir 2>&1 | FileCheck %s --check-prefix=AARCH64
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+// lround
+
+long my_lroundf(float f) {
+  return __builtin_lroundf(f);
+  // CHECK: cir.func dso_local @my_lroundf
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @my_lroundf
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long my_lround(double f) {
+  return __builtin_lround(f);
+  // CHECK: cir.func dso_local @my_lround
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @my_lround
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long my_lroundl(long double f) {
+  return __builtin_lroundl(f);
+  // CHECK: cir.func dso_local @my_lroundl
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.lround %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @my_lroundl
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+long lroundf(float);
+long lround(double);
+long lroundl(long double);
+
+long call_lroundf(float f) {
+  return lroundf(f);
+  // CHECK: cir.func dso_local @call_lroundf
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @call_lroundf
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long call_lround(double f) {
+  return lround(f);
+  // CHECK: cir.func dso_local @call_lround
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @call_lround
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long call_lroundl(long double f) {
+  return lroundl(f);
+  // CHECK: cir.func dso_local @call_lroundl
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.lround %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @call_lroundl
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// llround
+
+long long my_llroundf(float f) {
+  return __builtin_llroundf(f);
+  // CHECK: cir.func dso_local @my_llroundf
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @my_llroundf
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long long my_llround(double f) {
+  return __builtin_llround(f);
+  // CHECK: cir.func dso_local @my_llround
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @my_llround
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long long my_llroundl(long double f) {
+  return __builtin_llroundl(f);
+  // CHECK: cir.func dso_local @my_llroundl
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.llround %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @my_llroundl
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+long long llroundf(float);
+long long llround(double);
+long long llroundl(long double);
+
+long long call_llroundf(float f) {
+  return llroundf(f);
+  // CHECK: cir.func dso_local @call_llroundf
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @call_llroundf
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long long call_llround(double f) {
+  return llround(f);
+  // CHECK: cir.func dso_local @call_llround
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @call_llround
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long long call_llroundl(long double f) {
+  return llroundl(f);
+  // CHECK: cir.func dso_local @call_llroundl
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.llround %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @call_llroundl
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// lrint
+
+long my_lrintf(float f) {
+  return __builtin_lrintf(f);
+  // CHECK: cir.func dso_local @my_lrintf
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @my_lrintf
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long my_lrint(double f) {
+  return __builtin_lrint(f);
+  // CHECK: cir.func dso_local @my_lrint
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @my_lrint
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long my_lrintl(long double f) {
+  return __builtin_lrintl(f);
+  // CHECK: cir.func dso_local @my_lrintl
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.lrint %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @my_lrintl
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+long lrintf(float);
+long lrint(double);
+long lrintl(long double);
+
+long call_lrintf(float f) {
+  return lrintf(f);
+  // CHECK: cir.func dso_local @call_lrintf
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @call_lrintf
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long call_lrint(double f) {
+  return lrint(f);
+  // CHECK: cir.func dso_local @call_lrint
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @call_lrint
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long call_lrintl(long double f) {
+  return lrintl(f);
+  // CHECK: cir.func dso_local @call_lrintl
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.lrint %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @call_lrintl
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// llrint
+
+long long my_llrintf(float f) {
+  return __builtin_llrintf(f);
+  // CHECK: cir.func dso_local @my_llrintf
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @my_llrintf
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long long my_llrint(double f) {
+  return __builtin_llrint(f);
+  // CHECK: cir.func dso_local @my_llrint
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @my_llrint
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long long my_llrintl(long double f) {
+  return __builtin_llrintl(f);
+  // CHECK: cir.func dso_local @my_llrintl
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.llrint %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @my_llrintl
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+long long llrintf(float);
+long long llrint(double);
+long long llrintl(long double);
+
+long long call_llrintf(float f) {
+  return llrintf(f);
+  // CHECK: cir.func dso_local @call_llrintf
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @call_llrintf
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long long call_llrint(double f) {
+  return llrint(f);
+  // CHECK: cir.func dso_local @call_llrint
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @call_llrint
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long long call_llrintl(long double f) {
+  return llrintl(f);
+  // CHECK: cir.func dso_local @call_llrintl
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.llrint %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @call_llrintl
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// ceil
+
+float my_ceilf(float f) {
+  return __builtin_ceilf(f);
+  // CHECK: cir.func dso_local @my_ceilf
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_ceilf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.ceil.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_ceil(double f) {
+  return __builtin_ceil(f);
+  // CHECK: cir.func dso_local @my_ceil
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_ceil(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.ceil.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_ceill(long double f) {
+  return __builtin_ceill(f);
+  // CHECK: cir.func dso_local @my_ceill
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.ceil {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_ceill(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.ceil.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float ceilf(float);
+double ceil(double);
+long double ceill(long double);
+
+float call_ceilf(float f) {
+  return ceilf(f);
+  // CHECK: cir.func dso_local @call_ceilf
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_ceilf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.ceil.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_ceil(double f) {
+  return ceil(f);
+  // CHECK: cir.func dso_local @call_ceil
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_ceil(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.ceil.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_ceill(long double f) {
+  return ceill(f);
+  // CHECK: cir.func dso_local @call_ceill
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.ceil {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_ceill(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.ceil.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// cos
+
+float my_cosf(float f) {
+  return __builtin_cosf(f);
+  // CHECK: cir.func dso_local @my_cosf
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_cosf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.cos.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_cos(double f) {
+  return __builtin_cos(f);
+  // CHECK: cir.func dso_local @my_cos
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_cos(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.cos.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_cosl(long double f) {
+  return __builtin_cosl(f);
+  // CHECK: cir.func dso_local @my_cosl
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.cos {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_cosl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.cos.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float cosf(float);
+double cos(double);
+long double cosl(long double);
+
+float call_cosf(float f) {
+  return cosf(f);
+  // CHECK: cir.func dso_local @call_cosf
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_cosf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.cos.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_cos(double f) {
+  return cos(f);
+  // CHECK: cir.func dso_local @call_cos
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_cos(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.cos.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_cosl(long double f) {
+  return cosl(f);
+  // CHECK: cir.func dso_local @call_cosl
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.cos {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_cosl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.cos.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// exp
+
+float my_expf(float f) {
+  return __builtin_expf(f);
+  // CHECK: cir.func dso_local @my_expf
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_expf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.exp.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_exp(double f) {
+  return __builtin_exp(f);
+  // CHECK: cir.func dso_local @my_exp
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_exp(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.exp.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_expl(long double f) {
+  return __builtin_expl(f);
+  // CHECK: cir.func dso_local @my_expl
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.exp {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_expl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.exp.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float expf(float);
+double exp(double);
+long double expl(long double);
+
+float call_expf(float f) {
+  return expf(f);
+  // CHECK: cir.func dso_local @call_expf
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_expf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.exp.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_exp(double f) {
+  return exp(f);
+  // CHECK: cir.func dso_local @call_exp
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_exp(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.exp.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_expl(long double f) {
+  return expl(f);
+  // CHECK: cir.func dso_local @call_expl
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.exp {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_expl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.exp.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// exp2
+
+float my_exp2f(float f) {
+  return __builtin_exp2f(f);
+  // CHECK: cir.func dso_local @my_exp2f
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_exp2f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.exp2.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_exp2(double f) {
+  return __builtin_exp2(f);
+  // CHECK: cir.func dso_local @my_exp2
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_exp2(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.exp2.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_exp2l(long double f) {
+  return __builtin_exp2l(f);
+  // CHECK: cir.func dso_local @my_exp2l
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.exp2 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_exp2l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.exp2.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float exp2f(float);
+double exp2(double);
+long double exp2l(long double);
+
+float call_exp2f(float f) {
+  return exp2f(f);
+  // CHECK: cir.func dso_local @call_exp2f
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_exp2f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.exp2.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_exp2(double f) {
+  return exp2(f);
+  // CHECK: cir.func dso_local @call_exp2
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_exp2(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.exp2.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_exp2l(long double f) {
+  return exp2l(f);
+  // CHECK: cir.func dso_local @call_exp2l
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.exp2 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_exp2l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.exp2.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// floor
+
+float my_floorf(float f) {
+  return __builtin_floorf(f);
+  // CHECK: cir.func dso_local @my_floorf
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_floorf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.floor.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_floor(double f) {
+  return __builtin_floor(f);
+  // CHECK: cir.func dso_local @my_floor
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_floor(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.floor.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_floorl(long double f) {
+  return __builtin_floorl(f);
+  // CHECK: cir.func dso_local @my_floorl
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.floor {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_floorl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.floor.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float floorf(float);
+double floor(double);
+long double floorl(long double);
+
+float call_floorf(float f) {
+  return floorf(f);
+  // CHECK: cir.func dso_local @call_floorf
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_floorf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.floor.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_floor(double f) {
+  return floor(f);
+  // CHECK: cir.func dso_local @call_floor
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_floor(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.floor.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_floorl(long double f) {
+  return floorl(f);
+  // CHECK: cir.func dso_local @call_floorl
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.floor {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_floorl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.floor.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// log
+
+float my_logf(float f) {
+  return __builtin_logf(f);
+  // CHECK: cir.func dso_local @my_logf
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_logf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_log(double f) {
+  return __builtin_log(f);
+  // CHECK: cir.func dso_local @my_log
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_log(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_logl(long double f) {
+  return __builtin_logl(f);
+  // CHECK: cir.func dso_local @my_logl
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_logl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float logf(float);
+double log(double);
+long double logl(long double);
+
+float call_logf(float f) {
+  return logf(f);
+  // CHECK: cir.func dso_local @call_logf
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_logf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_log(double f) {
+  return log(f);
+  // CHECK: cir.func dso_local @call_log
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_log(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_logl(long double f) {
+  return logl(f);
+  // CHECK: cir.func dso_local @call_logl
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_logl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// log10
+
+float my_log10f(float f) {
+  return __builtin_log10f(f);
+  // CHECK: cir.func dso_local @my_log10f
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_log10f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log10.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_log10(double f) {
+  return __builtin_log10(f);
+  // CHECK: cir.func dso_local @my_log10
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_log10(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log10.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_log10l(long double f) {
+  return __builtin_log10l(f);
+  // CHECK: cir.func dso_local @my_log10l
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log10 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_log10l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log10.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float log10f(float);
+double log10(double);
+long double log10l(long double);
+
+float call_log10f(float f) {
+  return log10f(f);
+  // CHECK: cir.func dso_local @call_log10f
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_log10f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log10.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_log10(double f) {
+  return log10(f);
+  // CHECK: cir.func dso_local @call_log10
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_log10(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log10.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_log10l(long double f) {
+  return log10l(f);
+  // CHECK: cir.func dso_local @call_log10l
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log10 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_log10l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log10.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// log2
+
+float my_log2f(float f) {
+  return __builtin_log2f(f);
+  // CHECK: cir.func dso_local @my_log2f
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_log2f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log2.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_log2(double f) {
+  return __builtin_log2(f);
+  // CHECK: cir.func dso_local @my_log2
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_log2(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log2.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_log2l(long double f) {
+  return __builtin_log2l(f);
+  // CHECK: cir.func dso_local @my_log2l
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log2 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_log2l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log2.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float log2f(float);
+double log2(double);
+long double log2l(long double);
+
+float call_log2f(float f) {
+  return log2f(f);
+  // CHECK: cir.func dso_local @call_log2f
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_log2f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log2.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_log2(double f) {
+  return log2(f);
+  // CHECK: cir.func dso_local @call_log2
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_log2(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log2.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_log2l(long double f) {
+  return log2l(f);
+  // CHECK: cir.func dso_local @call_log2l
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log2 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_log2l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log2.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// nearbyint
+
+float my_nearbyintf(float f) {
+  return __builtin_nearbyintf(f);
+  // CHECK: cir.func dso_local @my_nearbyintf
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_nearbyintf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.nearbyint.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_nearbyint(double f) {
+  return __builtin_nearbyint(f);
+  // CHECK: cir.func dso_local @my_nearbyint
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_nearbyint(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.nearbyint.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_nearbyintl(long double f) {
+  return __builtin_nearbyintl(f);
+  // CHECK: cir.func dso_local @my_nearbyintl
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.nearbyint {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_nearbyintl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.nearbyint.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float nearbyintf(float);
+double nearbyint(double);
+long double nearbyintl(long double);
+
+float call_nearbyintf(float f) {
+  return nearbyintf(f);
+  // CHECK: cir.func dso_local @call_nearbyintf
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_nearbyintf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.nearbyint.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_nearbyint(double f) {
+  return nearbyint(f);
+  // CHECK: cir.func dso_local @call_nearbyint
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_nearbyint(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.nearbyint.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_nearbyintl(long double f) {
+  return nearbyintl(f);
+  // CHECK: cir.func dso_local @call_nearbyintl
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.nearbyint {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_nearbyintl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.nearbyint.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// rint
+
+float my_rintf(float f) {
+  return __builtin_rintf(f);
+  // CHECK: cir.func dso_local @my_rintf
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_rintf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.rint.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_rint(double f) {
+  return __builtin_rint(f);
+  // CHECK: cir.func dso_local @my_rint
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_rint(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.rint.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_rintl(long double f) {
+  return __builtin_rintl(f);
+  // CHECK: cir.func dso_local @my_rintl
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.rint {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_rintl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.rint.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float rintf(float);
+double rint(double);
+long double rintl(long double);
+
+float call_rintf(float f) {
+  return rintf(f);
+  // CHECK: cir.func dso_local @call_rintf
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_rintf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.rint.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_rint(double f) {
+  return rint(f);
+  // CHECK: cir.func dso_local @call_rint
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_rint(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.rint.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_rintl(long double f) {
+  return rintl(f);
+  // CHECK: cir.func dso_local @call_rintl
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.rint {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_rintl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.rint.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// round
+
+float my_roundf(float f) {
+  return __builtin_roundf(f);
+  // CHECK: cir.func dso_local @my_roundf
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_roundf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.round.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_round(double f) {
+  return __builtin_round(f);
+  // CHECK: cir.func dso_local @my_round
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_round(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.round.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_roundl(long double f) {
+  return __builtin_roundl(f);
+  // CHECK: cir.func dso_local @my_roundl
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.round {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_roundl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.round.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float roundf(float);
+double round(double);
+long double roundl(long double);
+
+float call_roundf(float f) {
+  return roundf(f);
+  // CHECK: cir.func dso_local @call_roundf
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_roundf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.round.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_round(double f) {
+  return round(f);
+  // CHECK: cir.func dso_local @call_round
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_round(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.round.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_roundl(long double f) {
+  return roundl(f);
+  // CHECK: cir.func dso_local @call_roundl
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.round {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_roundl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.round.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// sin
+
+float my_sinf(float f) {
+  return __builtin_sinf(f);
+  // CHECK: cir.func dso_local @my_sinf
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_sinf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.sin.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_sin(double f) {
+  return __builtin_sin(f);
+  // CHECK: cir.func dso_local @my_sin
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_sin(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.sin.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_sinl(long double f) {
+  return __builtin_sinl(f);
+  // CHECK: cir.func dso_local @my_sinl
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.sin {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_sinl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.sin.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float sinf(float);
+double sin(double);
+long double sinl(long double);
+
+float call_sinf(float f) {
+  return sinf(f);
+  // CHECK: cir.func dso_local @call_sinf
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_sinf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.sin.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_sin(double f) {
+  return sin(f);
+  // CHECK: cir.func dso_local @call_sin
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_sin(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.sin.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_sinl(long double f) {
+  return sinl(f);
+  // CHECK: cir.func dso_local @call_sinl
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.sin {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_sinl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.sin.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// sqrt
+
+float my_sqrtf(float f) {
+  return __builtin_sqrtf(f);
+  // CHECK: cir.func dso_local @my_sqrtf
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_sqrtf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.sqrt.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_sqrt(double f) {
+  return __builtin_sqrt(f);
+  // CHECK: cir.func dso_local @my_sqrt
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_sqrt(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.sqrt.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_sqrtl(long double f) {
+  return __builtin_sqrtl(f);
+  // CHECK: cir.func dso_local @my_sqrtl
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.sqrt {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_sqrtl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.sqrt.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float sqrtf(float);
+double sqrt(double);
+long double sqrtl(long double);
+
+float call_sqrtf(float f) {
+  return sqrtf(f);
+  // CHECK: cir.func dso_local @call_sqrtf
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_sqrtf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.sqrt.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_sqrt(double f) {
+  return sqrt(f);
+  // CHECK: cir.func dso_local @call_sqrt
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_sqrt(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.sqrt.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_sqrtl(long double f) {
+  return sqrtl(f);
+  // CHECK: cir.func dso_local @call_sqrtl
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.sqrt {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_sqrtl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.sqrt.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// tan
+
+float my_tanf(float f) {
+  return __builtin_tanf(f);
+  // CHECK: cir.func dso_local @my_tanf
+  // CHECK: {{.+}} = cir.tan {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_tanf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.tan.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_tan(double f) {
+  return __builtin_tan(f);
+  // CHECK: cir.func dso_local @my_tan
+  // CHECK: {{.+}} = cir.tan {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_tan(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.tan.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_tanl(long double f) {
+  return __builtin_tanl(f);
+  // CHECK: cir.func dso_local @my_tanl
+  // CHECK: {{.+}} = cir.tan {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.tan {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_tanl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.tan.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float tanf(float);
+double tan(double);
+long double tanl(long double);
+
+float call_tanf(float f) {
+  return tanf(f);
+  // CHECK: cir.func dso_local @call_tanf
+  // CHECK: {{.+}} = cir.tan {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_tanf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.tan.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_tan(double f) {
+  return tan(f);
+  // CHECK: cir.func dso_local @call_tan
+  // CHECK: {{.+}} = cir.tan {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_tan(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.tan.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_tanl(long double f) {
+  return tanl(f);
+  // CHECK: cir.func dso_local @call_tanl
+  // CHECK: {{.+}} = cir.tan {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.tan {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_tanl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.tan.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// trunc
+
+float my_truncf(float f) {
+  return __builtin_truncf(f);
+  // CHECK: cir.func dso_local @my_truncf
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_truncf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.trunc.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_trunc(double f) {
+  return __builtin_trunc(f);
+  // CHECK: cir.func dso_local @my_trunc
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_trunc(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.trunc.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_truncl(long double f) {
+  return __builtin_truncl(f);
+  // CHECK: cir.func dso_local @my_truncl
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.trunc {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_truncl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.trunc.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float truncf(float);
+double trunc(double);
+long double truncl(long double);
+
+float call_truncf(float f) {
+  return truncf(f);
+  // CHECK: cir.func dso_local @call_truncf
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_truncf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.trunc.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_trunc(double f) {
+  return trunc(f);
+  // CHECK: cir.func dso_local @call_trunc
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_trunc(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.trunc.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_truncl(long double f) {
+  return truncl(f);
+  // CHECK: cir.func dso_local @call_truncl
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.trunc {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_truncl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.trunc.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// copysign
+
+float my_copysignf(float x, float y) {
+  return __builtin_copysignf(x, y);
+  // CHECK: cir.func dso_local @my_copysignf
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_copysignf
+  // LLVM:   %{{.+}} = call float @llvm.copysign.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double my_copysign(double x, double y) {
+  return __builtin_copysign(x, y);
+  // CHECK: cir.func dso_local @my_copysign
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_copysign
+  // LLVM:   %{{.+}} = call double @llvm.copysign.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double my_copysignl(long double x, long double y) {
+  return __builtin_copysignl(x, y);
+  // CHECK: cir.func dso_local @my_copysignl
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_copysignl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.copysign.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float copysignf(float, float);
+double copysign(double, double);
+long double copysignl(long double, long double);
+
+float call_copysignf(float x, float y) {
+  return copysignf(x, y);
+  // CHECK: cir.func dso_local @call_copysignf
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_copysignf
+  // LLVM:   %{{.+}} = call float @llvm.copysign.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double call_copysign(double x, double y) {
+  return copysign(x, y);
+  // CHECK: cir.func dso_local @call_copysign
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_copysign
+  // LLVM:   %{{.+}} = call double @llvm.copysign.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double call_copysignl(long double x, long double y) {
+  return copysignl(x, y);
+  // CHECK: cir.func dso_local @call_copysignl
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_copysignl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.copysign.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// fmax
+
+float my_fmaxf(float x, float y) {
+  return __builtin_fmaxf(x, y);
+  // CHECK: cir.func dso_local @my_fmaxf
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_fmaxf
+  // LLVM:   %{{.+}} = call float @llvm.maxnum.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double my_fmax(double x, double y) {
+  return __builtin_fmax(x, y);
+  // CHECK: cir.func dso_local @my_fmax
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_fmax
+  // LLVM:   %{{.+}} = call double @llvm.maxnum.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double my_fmaxl(long double x, long double y) {
+  return __builtin_fmaxl(x, y);
+  // CHECK: cir.func dso_local @my_fmaxl
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_fmaxl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float fmaxf(float, float);
+double fmax(double, double);
+long double fmaxl(long double, long double);
+
+float call_fmaxf(float x, float y) {
+  return fmaxf(x, y);
+  // CHECK: cir.func dso_local @call_fmaxf
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_fmaxf
+  // LLVM:   %{{.+}} = call float @llvm.maxnum.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double call_fmax(double x, double y) {
+  return fmax(x, y);
+  // CHECK: cir.func dso_local @call_fmax
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_fmax
+  // LLVM:   %{{.+}} = call double @llvm.maxnum.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double call_fmaxl(long double x, long double y) {
+  return fmaxl(x, y);
+  // CHECK: cir.func dso_local @call_fmaxl
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_fmaxl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// fmin
+
+float my_fminf(float x, float y) {
+  return __builtin_fminf(x, y);
+  // CHECK: cir.func dso_local @my_fminf
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_fminf
+  // LLVM:   %{{.+}} = call float @llvm.minnum.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double my_fmin(double x, double y) {
+  return __builtin_fmin(x, y);
+  // CHECK: cir.func dso_local @my_fmin
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_fmin
+  // LLVM:   %{{.+}} = call double @llvm.minnum.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double my_fminl(long double x, long double y) {
+  return __builtin_fminl(x, y);
+  // CHECK: cir.func dso_local @my_fminl
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_fminl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.minnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float fminf(float, float);
+double fmin(double, double);
+long double fminl(long double, long double);
+
+float call_fminf(float x, float y) {
+  return fminf(x, y);
+  // CHECK: cir.func dso_local @call_fminf
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_fminf
+  // LLVM:   %{{.+}} = call float @llvm.minnum.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double call_fmin(double x, double y) {
+  return fmin(x, y);
+  // CHECK: cir.func dso_local @call_fmin
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_fmin
+  // LLVM:   %{{.+}} = call double @llvm.minnum.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double call_fminl(long double x, long double y) {
+  return fminl(x, y);
+  // CHECK: cir.func dso_local @call_fminl
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_fminl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.minnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// fmod
+
+float my_fmodf(float x, float y) {
+  return __builtin_fmodf(x, y);
+  // CHECK: cir.func dso_local @my_fmodf
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_fmodf
+  // LLVM:   %{{.+}} = frem float %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+double my_fmod(double x, double y) {
+  return __builtin_fmod(x, y);
+  // CHECK: cir.func dso_local @my_fmod
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_fmod
+  // LLVM:   %{{.+}} = frem double %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+long double my_fmodl(long double x, long double y) {
+  return __builtin_fmodl(x, y);
+  // CHECK: cir.func dso_local @my_fmodl
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_fmodl
+  // LLVM:   %{{.+}} = frem x86_fp80 %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+float fmodf(float, float);
+double fmod(double, double);
+long double fmodl(long double, long double);
+
+float call_fmodf(float x, float y) {
+  return fmodf(x, y);
+  // CHECK: cir.func dso_local @call_fmodf
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_fmodf
+  // LLVM:   %{{.+}} = frem float %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+double call_fmod(double x, double y) {
+  return fmod(x, y);
+  // CHECK: cir.func dso_local @call_fmod
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_fmod
+  // LLVM:   %{{.+}} = frem double %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+long double call_fmodl(long double x, long double y) {
+  return fmodl(x, y);
+  // CHECK: cir.func dso_local @call_fmodl
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_fmodl
+  // LLVM:   %{{.+}} = frem x86_fp80 %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+// pow
+
+float my_powf(float x, float y) {
+  return __builtin_powf(x, y);
+  // CHECK: cir.func dso_local @my_powf
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_powf
+  // LLVM:   %{{.+}} = call float @llvm.pow.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double my_pow(double x, double y) {
+  return __builtin_pow(x, y);
+  // CHECK: cir.func dso_local @my_pow
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_pow
+  // LLVM:   %{{.+}} = call double @llvm.pow.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double my_powl(long double x, long double y) {
+  return __builtin_powl(x, y);
+  // CHECK: cir.func dso_local @my_powl
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_powl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.pow.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float powf(float, float);
+double pow(double, double);
+long double powl(long double, long double);
+
+float call_powf(float x, float y) {
+  return powf(x, y);
+  // CHECK: cir.func dso_local @call_powf
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_powf
+  // LLVM:   %{{.+}} = call float @llvm.pow.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double call_pow(double x, double y) {
+  return pow(x, y);
+  // CHECK: cir.func dso_local @call_pow
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_pow
+  // LLVM:   %{{.+}} = call double @llvm.pow.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double call_powl(long double x, long double y) {
+  return powl(x, y);
+  // CHECK: cir.func dso_local @call_powl
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_powl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.pow.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
diff --git a/clang/test/CIR/CodeGen/libc.c b/clang/test/CIR/CodeGen/libc.c
index f65fe92cd36a0..d4b61dab7bc42 100644
--- a/clang/test/CIR/CodeGen/libc.c
+++ b/clang/test/CIR/CodeGen/libc.c
@@ -27,13 +27,13 @@ void testMemset(void *dst, int val, unsigned long size) {
 double fabs(double);
 double testFabs(double x) {
   return fabs(x);
-  // CHECK: cir.call @fabs
+  // CHECK: cir.fabs %{{.+}} : !cir.double
 }
 
 float fabsf(float);
 float testFabsf(float x) {
   return fabsf(x);
-  // CHECK: cir.call @fabsf
+  // CHECK: cir.fabs %{{.+}} : !cir.float
 }
 
 int abs(int);
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index 900a1b80a0f4d..82b5d6a25cc59 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -8,7 +8,7 @@
 #include <immintrin.h>
 
 __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
-  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
+  // CIR-LABEL: cir.func dso_local @test_vcvtph2ps_mask
   // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
   // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
   // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.int<s, 1>>
@@ -26,7 +26,7 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
 }
 
 __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
-  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask
+  // CIR-LABEL: cir.func dso_local @test_vcvtph2ps256_mask
   // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
   // CIR: %{{.*}} = cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
 
@@ -42,7 +42,7 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
 }
 
 __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
-  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask
+  // CIR-LABEL: cir.func dso_local @test_vcvtph2ps512_mask
   // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
   // CIR: %{{.*}} = cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
 
@@ -58,7 +58,7 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
 }
 
 __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
-  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
+  // CIR-LABEL: cir.func dso_local @test_vcvtph2ps_maskz
   // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
   // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
   // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.int<s, 1>>
@@ -76,7 +76,7 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
 }
 
 __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
-  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
+  // CIR-LABEL: cir.func dso_local @test_vcvtph2ps256_maskz
   // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
   // CIR: %{{.*}} = cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
 
@@ -92,7 +92,7 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
 }
 
 __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
-  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
+  // CIR-LABEL: cir.func dso_local @test_vcvtph2ps512_maskz
   // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
   // CIR: %{{.*}} = cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
 
@@ -108,7 +108,7 @@ __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
 }
 
 __m512 test_mm512_cvt_roundph_ps(__m256i a) {
-  // CIR-LABEL: cir.func no_inline dso_local @test_mm512_cvt_roundph_ps
+  // CIR-LABEL: cir.func dso_local @test_mm512_cvt_roundph_ps
   // CIR: %{{.*}} = cir.call_llvm_intrinsic "x86.avx512.mask.vcvtph2ps.512" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s16i>, !cir.vector<16 x !cir.float>, !u16i, !s32i) -> !cir.vector<16 x !cir.float>
 
   // LLVM-LABEL: @test_mm512_cvt_roundph_ps
@@ -121,7 +121,7 @@ __m512 test_mm512_cvt_roundph_ps(__m256i a) {
 }
 
 __m512 test_mm512_mask_cvt_roundph_ps(__m512 w, __mmask16 u, __m256i a) {
-  // CIR-LABEL: cir.func no_inline dso_local @test_mm512_mask_cvt_roundph_ps
+  // CIR-LABEL: cir.func dso_local @test_mm512_mask_cvt_roundph_ps
   // CIR: %{{.*}} = cir.call_llvm_intrinsic "x86.avx512.mask.vcvtph2ps.512" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s16i>, !cir.vector<16 x !cir.float>, !u16i, !s32i) -> !cir.vector<16 x !cir.float>
 
   // LLVM-LABEL: @test_mm512_mask_cvt_roundph_ps
@@ -134,7 +134,7 @@ __m512 test_mm512_mask_cvt_roundph_ps(__m512 w, __mmask16 u, __m256i a) {
 }
 
 __m512 test_mm512_maskz_cvt_roundph_ps(__mmask16 u, __m256i a) {
-  // CIR-LABEL: cir.func no_inline dso_local @test_mm512_maskz_cvt_roundph_ps
+  // CIR-LABEL: cir.func dso_local @test_mm512_maskz_cvt_roundph_ps
   // CIR: %{{.*}} = cir.call_llvm_intrinsic "x86.avx512.mask.vcvtph2ps.512" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s16i>, !cir.vector<16 x !cir.float>, !u16i, !s32i) -> !cir.vector<16 x !cir.float>
 
   // LLVM-LABEL: @test_mm512_maskz_cvt_roundph_ps
diff --git a/clang/test/CIR/CodeGenBuiltins/builtin-fcmp-sse.c b/clang/test/CIR/CodeGenBuiltins/builtin-fcmp-sse.c
index 35abd1b57ecb0..a008bbfba50da 100644
--- a/clang/test/CIR/CodeGenBuiltins/builtin-fcmp-sse.c
+++ b/clang/test/CIR/CodeGenBuiltins/builtin-fcmp-sse.c
@@ -9,7 +9,7 @@ typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
 typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
 
 __m128 test_cmpnleps(__m128 A, __m128 B) {
-  // CIR-LABEL:   cir.func no_inline dso_local @test_cmpnleps(
+  // CIR-LABEL:   cir.func dso_local @test_cmpnleps(
   // CIR:           %[[ARG0:.*]]: !cir.vector<4 x !cir.float> {{.*}}, %[[ARG1:.*]]: !cir.vector<4 x !cir.float> {{.*}}) -> !cir.vector<4 x !cir.float> {
   // CIR:           %[[ALLOCA_0:.*]] = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["A", init] {alignment = 16 : i64}
   // CIR:           %[[ALLOCA_1:.*]] = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["B", init] {alignment = 16 : i64}
@@ -27,7 +27,7 @@ __m128 test_cmpnleps(__m128 A, __m128 B) {
   // CIR:         } 
 
   // LLVM-LABEL: define dso_local <4 x float> @test_cmpnleps(
-  // LLVM-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+  // LLVM-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) {
   // LLVM-NEXT:    [[TMP3:%.*]] = alloca <4 x float>, i64 1, align 16
   // LLVM-NEXT:    [[TMP4:%.*]] = alloca <4 x float>, i64 1, align 16
   // LLVM-NEXT:    [[TMP5:%.*]] = alloca <4 x float>, i64 1, align 16
@@ -44,7 +44,7 @@ __m128 test_cmpnleps(__m128 A, __m128 B) {
   // LLVM-NEXT:    ret <4 x float> [[TMP12]]
 
   // OGCG-LABEL: define dso_local <4 x float> @test_cmpnleps(
-  // OGCG-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+  // OGCG-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #{{[0-9]+}} {
   // OGCG-NEXT:  [[ENTRY:.*:]]
   // OGCG-NEXT:    [[A_ADDR:%.*]] = alloca <4 x float>, align 16
   // OGCG-NEXT:    [[B_ADDR:%.*]] = alloca <4 x float>, align 16
@@ -60,7 +60,7 @@ __m128 test_cmpnleps(__m128 A, __m128 B) {
 }
 
 __m128d test_cmpnlepd(__m128d A, __m128d B) {
-  // CIR-LABEL:   cir.func no_inline dso_local @test_cmpnlepd(
+  // CIR-LABEL:   cir.func dso_local @test_cmpnlepd(
   // CIR:           %[[ARG0:.*]]: !cir.vector<2 x !cir.double> {{.*}}, %[[ARG1:.*]]: !cir.vector<2 x !cir.double> {{.*}}) -> !cir.vector<2 x !cir.double> {
   // CIR:           %[[ALLOCA_0:.*]] = cir.alloca !cir.vector<2 x !cir.double>, !cir.ptr<!cir.vector<2 x !cir.double>>, ["A", init] {alignment = 16 : i64} 
   // CIR:           %[[ALLOCA_1:.*]] = cir.alloca !cir.vector<2 x !cir.double>, !cir.ptr<!cir.vector<2 x !cir.double>>, ["B", init] {alignment = 16 : i64} 
@@ -78,7 +78,7 @@ __m128d test_cmpnlepd(__m128d A, __m128d B) {
   // CIR:         } 
 
   // LLVM-LABEL: define dso_local <2 x double> @test_cmpnlepd(
-  // LLVM-SAME: <2 x double> [[TMP0:%.*]], <2 x double> [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+  // LLVM-SAME: <2 x double> [[TMP0:%.*]], <2 x double> [[TMP1:%.*]]) {
   // LLVM-NEXT:    [[TMP3:%.*]] = alloca <2 x double>, i64 1, align 16
   // LLVM-NEXT:    [[TMP4:%.*]] = alloca <2 x double>, i64 1, align 16
   // LLVM-NEXT:    [[TMP5:%.*]] = alloca <2 x double>, i64 1, align 16
@@ -95,7 +95,7 @@ __m128d test_cmpnlepd(__m128d A, __m128d B) {
   // LLVM-NEXT:    ret <2 x double> [[TMP12]]
 
   // OGCG-LABEL: define dso_local <2 x double> @test_cmpnlepd(
-  // OGCG-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+  // OGCG-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #{{[0-9]+}} {
   // OGCG-NEXT:  [[ENTRY:.*:]]
   // OGCG-NEXT:    [[A_ADDR:%.*]] = alloca <2 x double>, align 16
   // OGCG-NEXT:    [[B_ADDR:%.*]] = alloca <2 x double>, align 16
@@ -111,7 +111,7 @@ __m128d test_cmpnlepd(__m128d A, __m128d B) {
 }
 
 __m128 test_cmpnltps(__m128 A, __m128 B) {
-  // CIR-LABEL:   cir.func no_inline dso_local @test_cmpnltps(
+  // CIR-LABEL:   cir.func dso_local @test_cmpnltps(
   // CIR:           %[[ARG0:.*]]: !cir.vector<4 x !cir.float> {{.*}}, %[[ARG1:.*]]: !cir.vector<4 x !cir.float> {{.*}}) -> !cir.vector<4 x !cir.float> {
   // CIR:           %[[ALLOCA_0:.*]] = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["A", init] {alignment = 16 : i64} 
   // CIR:           %[[ALLOCA_1:.*]] = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["B", init] {alignment = 16 : i64} 
@@ -129,7 +129,7 @@ __m128 test_cmpnltps(__m128 A, __m128 B) {
   // CIR:         } 
 
   // LLVM-LABEL: define dso_local <4 x float> @test_cmpnltps(
-  // LLVM-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+  // LLVM-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) {
   // LLVM-NEXT:    [[TMP3:%.*]] = alloca <4 x float>, i64 1, align 16
   // LLVM-NEXT:    [[TMP4:%.*]] = alloca <4 x float>, i64 1, align 16
   // LLVM-NEXT:    [[TMP5:%.*]] = alloca <4 x float>, i64 1, align 16
@@ -146,7 +146,7 @@ __m128 test_cmpnltps(__m128 A, __m128 B) {
   // LLVM-NEXT:    ret <4 x float> [[TMP12]]
 
   // OGCG-LABEL: define dso_local <4 x float> @test_cmpnltps(
-  // OGCG-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+  // OGCG-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #{{[0-9]+}} {
   // OGCG-NEXT:  [[ENTRY:.*:]]
   // OGCG-NEXT:    [[A_ADDR:%.*]] = alloca <4 x float>, align 16
   // OGCG-NEXT:    [[B_ADDR:%.*]] = alloca <4 x float>, align 16
@@ -162,7 +162,7 @@ __m128 test_cmpnltps(__m128 A, __m128 B) {
 }
 
 __m128d test_cmpnltpd(__m128d A, __m128d B) {
-  // CIR-LABEL:   cir.func no_inline dso_local @test_cmpnltpd(
+  // CIR-LABEL:   cir.func dso_local @test_cmpnltpd(
   // CIR:           %[[ARG0:.*]]: !cir.vector<2 x !cir.double> {{.*}}, %[[ARG1:.*]]: !cir.vector<2 x !cir.double> {{.*}}) -> !cir.vector<2 x !cir.double> {
   // CIR:           %[[ALLOCA_0:.*]] = cir.alloca !cir.vector<2 x !cir.double>, !cir.ptr<!cir.vector<2 x !cir.double>>, ["A", init] {alignment = 16 : i64} 
   // CIR:           %[[ALLOCA_1:.*]] = cir.alloca !cir.vector<2 x !cir.double>, !cir.ptr<!cir.vector<2 x !cir.double>>, ["B", init] {alignment = 16 : i64} 
@@ -180,7 +180,7 @@ __m128d test_cmpnltpd(__m128d A, __m128d B) {
   // CIR:         } 
 
   // LLVM-LABEL: define dso_local <2 x double> @test_cmpnltpd(
-  // LLVM-SAME: <2 x double> [[TMP0:%.*]], <2 x double> [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+  // LLVM-SAME: <2 x double> [[TMP0:%.*]], <2 x double> [[TMP1:%.*]]) {
   // LLVM-NEXT:    [[TMP3:%.*]] = alloca <2 x double>, i64 1, align 16
   // LLVM-NEXT:    [[TMP4:%.*]] = alloca <2 x double>, i64 1, align 16
   // LLVM-NEXT:    [[TMP5:%.*]] = alloca <2 x double>, i64 1, align 16
@@ -197,7 +197,7 @@ __m128d test_cmpnltpd(__m128d A, __m128d B) {
   // LLVM-NEXT:    ret <2 x double> [[TMP12]]
 
   // OGCG-LABEL: define dso_local <2 x double> @test_cmpnltpd(
-  // OGCG-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+  // OGCG-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #{{[0-9]+}} {
   // OGCG-NEXT:  [[ENTRY:.*:]]
   // OGCG-NEXT:    [[A_ADDR:%.*]] = alloca <2 x double>, align 16
   // OGCG-NEXT:    [[B_ADDR:%.*]] = alloca <2 x double>, align 16
diff --git a/clang/test/CIR/CodeGenBuiltins/builtin-isfpclass.c b/clang/test/CIR/CodeGenBuiltins/builtin-isfpclass.c
index 16d82c905f445..a8dea4dfb7d77 100644
--- a/clang/test/CIR/CodeGenBuiltins/builtin-isfpclass.c
+++ b/clang/test/CIR/CodeGenBuiltins/builtin-isfpclass.c
@@ -40,8 +40,8 @@ void test_is_finite(__fp16 *H, float F, double D, long double LD) {
     // OGCG: call i1 @llvm.is.fpclass.f32(float {{.*}}, i32 504)
 
     res = finite(D);
-    // CIR: cir.call @finite(%{{.*}}) nothrow side_effect(const) : (!cir.double) -> !s32i
-    // LLVM: call i32 @finite(double {{.*}})
+    // CIR: cir.is_fp_class %{{.*}}, fcFinite : (!cir.double) -> !cir.bool
+    // LLVM: call i1 @llvm.is.fpclass.f64(double {{.*}}, i32 504)
     // OGCG: call i1 @llvm.is.fpclass.f64(double %20, i32 504)
     res = __builtin_isnormal(*H);
     // CIR: cir.is_fp_class %{{.*}}, fcNormal : (!cir.f16) -> !cir.bool

>From 0022b1513a69d1a7e6ebcd5779c79223066c952b Mon Sep 17 00:00:00 2001
From: Adam Smith <adams at nvidia.com>
Date: Wed, 7 Jan 2026 12:38:59 -0800
Subject: [PATCH 2/6] [CIR] Add inverse trig, atan2, and roundeven intrinsics

This patch adds support for additional floating-point math intrinsics
that were missing from the initial implementation.

New CIR Operations (CIROps.td):
- CIR_RoundEvenOp: Rounds to nearest integer with ties to even
- CIR_FMaximumOp: IEEE 754-2019 maximum (propagates NaN)
- CIR_FMinimumOp: IEEE 754-2019 minimum (propagates NaN)
- CIR_ATan2Op: Two-argument arctangent

CIRGenBuiltin.cpp changes:
- Implement acos/acosf/acosl -> cir.acos (using existing ACosOp)
- Implement asin/asinf/asinl -> cir.asin (using existing ASinOp)
- Implement atan/atanf/atanl -> cir.atan (using existing ATanOp)
- Implement atan2/atan2f/atan2l -> cir.atan2 (new ATan2Op)
- Implement roundeven/roundevenf/roundevenl -> cir.roundeven (new RoundEvenOp)
- Previously these returned RValue::getIgnored() as NYI placeholders

LowerToLLVM.cpp changes:
- Add LLVM lowering for RoundEvenOp -> llvm.roundeven
- Add LLVM lowering for FMaximumOp -> llvm.maximum
- Add LLVM lowering for FMinimumOp -> llvm.minimum
- Add LLVM lowering for ATan2Op -> llvm.atan2

Test updates:
- builtin-floating-point.c: Add tests for acos, asin, atan, atan2,
  and roundeven builtins with CIR and LLVM checks
---
 clang/include/clang/CIR/Dialect/IR/CIROps.td  |  13 +++
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp       |   7 +-
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp |  36 ++++++
 .../test/CIR/CodeGen/builtin-floating-point.c | 110 ++++++++++++++++++
 4 files changed, 164 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 9c6a764d4d85f..7da882906d797 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -5216,6 +5216,16 @@ def CIR_RoundOp : CIR_UnaryFPToFPBuiltinOp<"round", "RoundOp"> {
   }];
 }
 
+def CIR_RoundEvenOp : CIR_UnaryFPToFPBuiltinOp<"roundeven", "RoundEvenOp"> {
+  let summary = "Rounds floating-point value to nearest integer, ties to even";
+  let description = [{
+    `cir.roundeven` rounds a floating-point operand to the nearest integer
+    value, with ties rounding to even (banker's rounding).
+
+    Floating-point exceptions are ignored, and it does not set `errno`.
+  }];
+}
+
 def CIR_SinOp : CIR_UnaryFPToFPBuiltinOp<"sin", "SinOp"> {
   let summary = "Computes the floating-point sine";
   let description = [{
@@ -5318,9 +5328,12 @@ class CIR_BinaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
 
 def CIR_CopysignOp : CIR_BinaryFPToFPBuiltinOp<"copysign", "CopySignOp">;
 def CIR_FMaxNumOp : CIR_BinaryFPToFPBuiltinOp<"fmaxnum", "MaxNumOp">;
+def CIR_FMaximumOp : CIR_BinaryFPToFPBuiltinOp<"fmaximum", "MaximumOp">;
 def CIR_FMinNumOp : CIR_BinaryFPToFPBuiltinOp<"fminnum", "MinNumOp">;
+def CIR_FMinimumOp : CIR_BinaryFPToFPBuiltinOp<"fminimum", "MinimumOp">;
 def CIR_FModOp : CIR_BinaryFPToFPBuiltinOp<"fmod", "FRemOp">;
 def CIR_PowOp : CIR_BinaryFPToFPBuiltinOp<"pow", "PowOp">;
+def CIR_ATan2Op : CIR_BinaryFPToFPBuiltinOp<"atan2", "ATan2Op">;
 
 //===----------------------------------------------------------------------===//
 // Variadic Operations
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 74aecbbb56f6e..7d7031f21da22 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -302,6 +302,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_acosl:
   case Builtin::BI__builtin_acosf128:
   case Builtin::BI__builtin_elementwise_acos:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::ACosOp>(cgf, *e);
   case Builtin::BIasin:
   case Builtin::BIasinf:
   case Builtin::BIasinl:
@@ -311,6 +312,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_asinl:
   case Builtin::BI__builtin_asinf128:
   case Builtin::BI__builtin_elementwise_asin:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::ASinOp>(cgf, *e);
   case Builtin::BIatan:
   case Builtin::BIatanf:
   case Builtin::BIatanl:
@@ -320,6 +322,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_atanl:
   case Builtin::BI__builtin_atanf128:
   case Builtin::BI__builtin_elementwise_atan:
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::ATanOp>(cgf, *e);
   case Builtin::BIatan2:
   case Builtin::BIatan2f:
   case Builtin::BIatan2l:
@@ -329,7 +332,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_atan2l:
   case Builtin::BI__builtin_atan2f128:
   case Builtin::BI__builtin_elementwise_atan2:
-    return RValue::getIgnored();
+    return emitBinaryFPBuiltin<cir::ATan2Op>(cgf, *e);
   case Builtin::BIceil:
   case Builtin::BIceilf:
   case Builtin::BIceill:
@@ -555,7 +558,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_roundevenl:
   case Builtin::BI__builtin_roundevenf128:
   case Builtin::BI__builtin_elementwise_roundeven:
-    return RValue::getIgnored();
+    return emitUnaryMaybeConstrainedFPBuiltin<cir::RoundEvenOp>(cgf, *e);
   case Builtin::BIsin:
   case Builtin::BIsinf:
   case Builtin::BIsinl:
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 309565b3b46ec..453323d3cd36b 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -267,6 +267,15 @@ mlir::LogicalResult CIRToLLVMRoundOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
+mlir::LogicalResult CIRToLLVMRoundEvenOpLowering::matchAndRewrite(
+    cir::RoundEvenOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::RoundEvenOp>(op, resTy,
+                                                       adaptor.getSrc());
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMSinOpLowering::matchAndRewrite(
     cir::SinOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
@@ -1645,6 +1654,24 @@ mlir::LogicalResult CIRToLLVMFMinNumOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
+mlir::LogicalResult CIRToLLVMFMaximumOpLowering::matchAndRewrite(
+    cir::FMaximumOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::MaximumOp>(
+      op, resTy, adaptor.getLhs(), adaptor.getRhs());
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMFMinimumOpLowering::matchAndRewrite(
+    cir::FMinimumOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::MinimumOp>(
+      op, resTy, adaptor.getLhs(), adaptor.getRhs());
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMFModOpLowering::matchAndRewrite(
     cir::FModOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
@@ -1663,6 +1690,15 @@ mlir::LogicalResult CIRToLLVMPowOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
+mlir::LogicalResult CIRToLLVMATan2OpLowering::matchAndRewrite(
+    cir::ATan2Op op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  rewriter.replaceOpWithNewOp<mlir::LLVM::ATan2Op>(op, resTy, adaptor.getLhs(),
+                                                   adaptor.getRhs());
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMLroundOpLowering::matchAndRewrite(
     cir::LroundOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
diff --git a/clang/test/CIR/CodeGen/builtin-floating-point.c b/clang/test/CIR/CodeGen/builtin-floating-point.c
index e2c7fddde9408..097e42ef57246 100644
--- a/clang/test/CIR/CodeGen/builtin-floating-point.c
+++ b/clang/test/CIR/CodeGen/builtin-floating-point.c
@@ -1634,3 +1634,113 @@ long double call_powl(long double x, long double y) {
   // LLVM:   %{{.+}} = call x86_fp80 @llvm.pow.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
   // LLVM: }
 }
+
+// acos
+
+float my_acosf(float x) {
+  return __builtin_acosf(x);
+  // CHECK: cir.func dso_local @my_acosf
+  // CHECK:   %{{.+}} = cir.acos %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_acosf
+  // LLVM:   %{{.+}} = call float @llvm.acos.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_acos(double x) {
+  return __builtin_acos(x);
+  // CHECK: cir.func dso_local @my_acos
+  // CHECK:   %{{.+}} = cir.acos %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_acos
+  // LLVM:   %{{.+}} = call double @llvm.acos.f64(double %{{.+}})
+  // LLVM: }
+}
+
+// asin
+
+float my_asinf(float x) {
+  return __builtin_asinf(x);
+  // CHECK: cir.func dso_local @my_asinf
+  // CHECK:   %{{.+}} = cir.asin %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_asinf
+  // LLVM:   %{{.+}} = call float @llvm.asin.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_asin(double x) {
+  return __builtin_asin(x);
+  // CHECK: cir.func dso_local @my_asin
+  // CHECK:   %{{.+}} = cir.asin %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_asin
+  // LLVM:   %{{.+}} = call double @llvm.asin.f64(double %{{.+}})
+  // LLVM: }
+}
+
+// atan
+
+float my_atanf(float x) {
+  return __builtin_atanf(x);
+  // CHECK: cir.func dso_local @my_atanf
+  // CHECK:   %{{.+}} = cir.atan %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_atanf
+  // LLVM:   %{{.+}} = call float @llvm.atan.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_atan(double x) {
+  return __builtin_atan(x);
+  // CHECK: cir.func dso_local @my_atan
+  // CHECK:   %{{.+}} = cir.atan %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_atan
+  // LLVM:   %{{.+}} = call double @llvm.atan.f64(double %{{.+}})
+  // LLVM: }
+}
+
+// atan2
+
+float my_atan2f(float y, float x) {
+  return __builtin_atan2f(y, x);
+  // CHECK: cir.func dso_local @my_atan2f
+  // CHECK:   %{{.+}} = cir.atan2 %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_atan2f
+  // LLVM:   %{{.+}} = call float @llvm.atan2.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double my_atan2(double y, double x) {
+  return __builtin_atan2(y, x);
+  // CHECK: cir.func dso_local @my_atan2
+  // CHECK:   %{{.+}} = cir.atan2 %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_atan2
+  // LLVM:   %{{.+}} = call double @llvm.atan2.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+// roundeven
+
+float my_roundevenf(float x) {
+  return __builtin_roundevenf(x);
+  // CHECK: cir.func dso_local @my_roundevenf
+  // CHECK:   %{{.+}} = cir.roundeven %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_roundevenf
+  // LLVM:   %{{.+}} = call float @llvm.roundeven.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_roundeven(double x) {
+  return __builtin_roundeven(x);
+  // CHECK: cir.func dso_local @my_roundeven
+  // CHECK:   %{{.+}} = cir.roundeven %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_roundeven
+  // LLVM:   %{{.+}} = call double @llvm.roundeven.f64(double %{{.+}})
+  // LLVM: }
+}

>From b4fe587773f0e17c96a66f9eb8becb45b4adfa6d Mon Sep 17 00:00:00 2001
From: Adam Smith <adams at nvidia.com>
Date: Wed, 7 Jan 2026 12:59:50 -0800
Subject: [PATCH 3/6] [CIR] Add elementwise builtin intrinsics support

This commit adds support for elementwise builtin intrinsics in CIR,
migrating functionality from the incubator to upstream.

Changes include:

1. CIROps.td: Added CIR_AbsOp for integer absolute value computation
   - Supports signed integers and vectors of signed integers
   - Includes 'poison' attribute for INT_MIN handling

2. CIRGenBuiltin.cpp: Implemented elementwise builtin emission
   - __builtin_elementwise_abs (integer via AbsOp, FP via FAbsOp)
   - __builtin_elementwise_acos, asin, atan, atan2
   - __builtin_elementwise_exp, exp2
   - __builtin_elementwise_log, log2, log10
   - __builtin_elementwise_cos, sin, tan
   - __builtin_elementwise_floor, round, rint, nearbyint, trunc
   - __builtin_elementwise_sqrt

3. LowerToLLVM.cpp: Added LLVM lowering for AbsOp
   - Uses mlir::LLVM::AbsOp for lowering with poison attribute

4. Test: Added builtins-elementwise.c
   - Comprehensive tests for all implemented elementwise builtins
   - Tests scalar float, double, and vector types (vfloat4, vdouble4)
   - Includes CIR, LLVM, and OGCG checks for verification
   - Updated vector type syntax to match upstream format
---
 clang/include/clang/CIR/Dialect/IR/CIROps.td  |  28 +
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp       |  47 +-
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp |  11 +
 clang/test/CIR/CodeGen/builtins-elementwise.c | 515 ++++++++++++++++++
 4 files changed, 589 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/CIR/CodeGen/builtins-elementwise.c

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 7da882906d797..7335f5ceca9f3 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -5266,6 +5266,34 @@ def CIR_FAbsOp : CIR_UnaryFPToFPBuiltinOp<"fabs", "FAbsOp"> {
   }];
 }
 
+def CIR_AbsOp : CIR_Op<"abs", [Pure, SameOperandsAndResultType]> {
+  let summary = "Computes the absolute value of a signed integer";
+  let description = [{
+    `cir.abs` computes the absolute value of a signed integer or vector
+    of signed integers.
+
+    The `poison` attribute indicates whether the result value is a poison
+    value if the argument is statically or dynamically INT_MIN.
+
+    Example:
+
+    ```mlir
+      %0 = cir.const #cir.int<-42> : s32i
+      %1 = cir.abs %0 poison : s32i
+      %2 = cir.abs %3 : !cir.vector<!s32i x 4>
+    ```
+  }];
+
+  let arguments = (ins
+    CIR_AnySIntOrVecOfSIntType:$src,
+    UnitAttr:$poison
+  );
+
+  let results = (outs CIR_AnySIntOrVecOfSIntType:$result);
+
+  let assemblyFormat = "$src ( `poison` $poison^ )? `:` type($src) attr-dict";
+}
+
 def CIR_FloorOp : CIR_UnaryFPToFPBuiltinOp<"floor", "FloorOp"> {
   let summary = "Computes the floating-point floor value";
   let description = [{
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 7d7031f21da22..0d9d12f744450 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1151,8 +1151,17 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
         convertType(e->getType())));
   }
   case Builtin::BI__builtin_nondeterministic_value:
-  case Builtin::BI__builtin_elementwise_abs:
     return errorBuiltinNYI(*this, e, builtinID);
+  case Builtin::BI__builtin_elementwise_abs: {
+    mlir::Type cirTy = convertType(e->getArg(0)->getType());
+    bool isIntTy = cir::isIntOrVectorOfIntType(cirTy);
+    if (!isIntTy)
+      return emitUnaryFPBuiltin<cir::FAbsOp>(*this, *e);
+    mlir::Value arg = emitScalarExpr(e->getArg(0));
+    auto call = builder.create<cir::AbsOp>(getLoc(e->getExprLoc()),
+                                           arg.getType(), arg, false);
+    return RValue::get(call->getResult(0));
+  }
   case Builtin::BI__builtin_elementwise_acos:
     return emitUnaryFPBuiltin<cir::ACosOp>(*this, *e);
   case Builtin::BI__builtin_elementwise_asin:
@@ -1160,31 +1169,45 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
   case Builtin::BI__builtin_elementwise_atan:
     return emitUnaryFPBuiltin<cir::ATanOp>(*this, *e);
   case Builtin::BI__builtin_elementwise_atan2:
-  case Builtin::BI__builtin_elementwise_ceil:
+    return emitBinaryFPBuiltin<cir::ATan2Op>(*this, *e);
   case Builtin::BI__builtin_elementwise_exp:
+    return emitUnaryFPBuiltin<cir::ExpOp>(*this, *e);
   case Builtin::BI__builtin_elementwise_exp2:
-  case Builtin::BI__builtin_elementwise_exp10:
-  case Builtin::BI__builtin_elementwise_ldexp:
+    return emitUnaryFPBuiltin<cir::Exp2Op>(*this, *e);
   case Builtin::BI__builtin_elementwise_log:
+    return emitUnaryFPBuiltin<cir::LogOp>(*this, *e);
   case Builtin::BI__builtin_elementwise_log2:
+    return emitUnaryFPBuiltin<cir::Log2Op>(*this, *e);
   case Builtin::BI__builtin_elementwise_log10:
-  case Builtin::BI__builtin_elementwise_pow:
-  case Builtin::BI__builtin_elementwise_bitreverse:
-    return errorBuiltinNYI(*this, e, builtinID);
+    return emitUnaryFPBuiltin<cir::Log10Op>(*this, *e);
   case Builtin::BI__builtin_elementwise_cos:
     return emitUnaryFPBuiltin<cir::CosOp>(*this, *e);
-  case Builtin::BI__builtin_elementwise_cosh:
   case Builtin::BI__builtin_elementwise_floor:
-  case Builtin::BI__builtin_elementwise_popcount:
-  case Builtin::BI__builtin_elementwise_roundeven:
+    return emitUnaryFPBuiltin<cir::FloorOp>(*this, *e);
   case Builtin::BI__builtin_elementwise_round:
+    return emitUnaryFPBuiltin<cir::RoundOp>(*this, *e);
   case Builtin::BI__builtin_elementwise_rint:
+    return emitUnaryFPBuiltin<cir::RintOp>(*this, *e);
   case Builtin::BI__builtin_elementwise_nearbyint:
+    return emitUnaryFPBuiltin<cir::NearbyintOp>(*this, *e);
   case Builtin::BI__builtin_elementwise_sin:
-  case Builtin::BI__builtin_elementwise_sinh:
+    return emitUnaryFPBuiltin<cir::SinOp>(*this, *e);
+  case Builtin::BI__builtin_elementwise_sqrt:
+    return emitUnaryFPBuiltin<cir::SqrtOp>(*this, *e);
   case Builtin::BI__builtin_elementwise_tan:
-  case Builtin::BI__builtin_elementwise_tanh:
+    return emitUnaryFPBuiltin<cir::TanOp>(*this, *e);
   case Builtin::BI__builtin_elementwise_trunc:
+    return emitUnaryFPBuiltin<cir::TruncOp>(*this, *e);
+  case Builtin::BI__builtin_elementwise_ceil:
+  case Builtin::BI__builtin_elementwise_exp10:
+  case Builtin::BI__builtin_elementwise_ldexp:
+  case Builtin::BI__builtin_elementwise_pow:
+  case Builtin::BI__builtin_elementwise_bitreverse:
+  case Builtin::BI__builtin_elementwise_cosh:
+  case Builtin::BI__builtin_elementwise_popcount:
+  case Builtin::BI__builtin_elementwise_roundeven:
+  case Builtin::BI__builtin_elementwise_sinh:
+  case Builtin::BI__builtin_elementwise_tanh:
   case Builtin::BI__builtin_elementwise_canonicalize:
   case Builtin::BI__builtin_elementwise_copysign:
   case Builtin::BI__builtin_elementwise_fma:
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 453323d3cd36b..f2e65fe9f74d8 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -2184,6 +2184,17 @@ mlir::LogicalResult CIRToLLVMFAbsOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
+mlir::LogicalResult CIRToLLVMAbsOpLowering::matchAndRewrite(
+    cir::AbsOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type resTy = typeConverter->convertType(op.getType());
+  auto absOp =
+      mlir::LLVM::AbsOp::create(rewriter, op.getLoc(), resTy,
+                                adaptor.getOperands()[0], adaptor.getPoison());
+  rewriter.replaceOp(op, absOp);
+  return mlir::success();
+}
+
 /// Convert the `cir.func` attributes to `llvm.func` attributes.
 /// Only retain those attributes that are not constructed by
 /// `LLVMFuncOp::build`. If `filterArgAttrs` is set, also filter out
diff --git a/clang/test/CIR/CodeGen/builtins-elementwise.c b/clang/test/CIR/CodeGen/builtins-elementwise.c
new file mode 100644
index 0000000000000..6c31ce19788d9
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtins-elementwise.c
@@ -0,0 +1,515 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir \
+// RUN:  -emit-llvm  %s -o %t.ll
+// RUN: FileCheck  --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -emit-llvm %s -o %t-ogcg.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t-ogcg.ll %s
+
+typedef int vint4 __attribute__((ext_vector_type(4)));
+typedef float vfloat4 __attribute__((ext_vector_type(4)));
+typedef double vdouble4 __attribute__((ext_vector_type(4)));
+
+void test_builtin_elementwise_abs(vint4 vi4, int i, float f, double d,
+                                  vfloat4 vf4, vdouble4  vd4) {
+    // CIR-LABEL: test_builtin_elementwise_abs
+    // LLVM-LABEL: test_builtin_elementwise_abs
+    // OGCG-LABEL: test_builtin_elementwise_abs
+    // CIR: {{%.*}} = cir.fabs {{%.*}} : !cir.float
+    // LLVM: {{%.*}} = call float @llvm.fabs.f32(float {{%.*}})
+    // OGCG: {{%.*}} = call float @llvm.fabs.f32(float {{%.*}})
+    f = __builtin_elementwise_abs(f);
+
+    // CIR: {{%.*}} = cir.fabs {{%.*}} : !cir.double
+    // LLVM: {{%.*}} = call double @llvm.fabs.f64(double {{%.*}})
+    // OGCG: {{%.*}} = call double @llvm.fabs.f64(double {{%.*}})
+    d = __builtin_elementwise_abs(d);
+
+    // CIR: {{%.*}} = cir.abs {{%.*}} : !cir.vector<4 x !s32i>
+    // LLVM: {{%.*}} = call <4 x i32> @llvm.abs.v4i32(<4 x i32> {{%.*}}, i1 false)
+    // OGCG: {{%.*}} = call <4 x i32> @llvm.abs.v4i32(<4 x i32> {{%.*}}, i1 false)
+    vi4 = __builtin_elementwise_abs(vi4);
+
+    // CIR: {{%.*}} = cir.abs {{%.*}} : !s32
+    // LLVM: {{%.*}} = call i32 @llvm.abs.i32(i32 {{%.*}}, i1 false)
+    // OGCG: {{%.*}} = call i32 @llvm.abs.i32(i32 {{%.*}}, i1 false)
+    i = __builtin_elementwise_abs(i);
+
+    // CIR: {{%.*}} = cir.fabs {{%.*}} : !cir.vector<4 x !cir.float>
+    // LLVM: {{%.*}} = call <4 x float> @llvm.fabs.v4f32(<4 x float> {{%.*}})
+    // OGCG: {{%.*}} = call <4 x float> @llvm.fabs.v4f32(<4 x float> {{%.*}})
+    vf4 = __builtin_elementwise_abs(vf4);
+
+    // CIR: {{%.*}} = cir.fabs {{%.*}} : !cir.vector<4 x !cir.double>
+    // LLVM: {{%.*}} = call <4 x double> @llvm.fabs.v4f64(<4 x double> {{%.*}})
+    // OGCG: {{%.*}} = call <4 x double> @llvm.fabs.v4f64(<4 x double> {{%.*}})
+    vd4 = __builtin_elementwise_abs(vd4);
+}
+
+void test_builtin_elementwise_acos(float f, double d, vfloat4 vf4,
+                                   vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_acos
+  // LLVM-LABEL: test_builtin_elementwise_acos
+  // OGCG-LABEL: test_builtin_elementwise_acos
+  // CIR: {{%.*}} = cir.acos {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.acos.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.acos.f32(float {{%.*}})
+  f = __builtin_elementwise_acos(f);
+
+  // CIR: {{%.*}} = cir.acos {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.acos.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.acos.f64(double {{%.*}})
+  d = __builtin_elementwise_acos(d);
+
+  // CIR: {{%.*}} = cir.acos {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.acos.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.acos.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_acos(vf4);
+
+  // CIR: {{%.*}} = cir.acos {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.acos.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.acos.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_acos(vd4);
+}
+
+void test_builtin_elementwise_asin(float f, double d, vfloat4 vf4,
+  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_asin
+  // LLVM-LABEL: test_builtin_elementwise_asin
+  // OGCG-LABEL: test_builtin_elementwise_asin
+  // CIR: {{%.*}} = cir.asin {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.asin.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.asin.f32(float {{%.*}})
+  f = __builtin_elementwise_asin(f);
+
+  // CIR: {{%.*}} = cir.asin {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.asin.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.asin.f64(double {{%.*}})
+  d = __builtin_elementwise_asin(d);
+
+  // CIR: {{%.*}} = cir.asin {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.asin.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.asin.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_asin(vf4);
+
+  // CIR: {{%.*}} = cir.asin {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.asin.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.asin.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_asin(vd4);
+}
+
+void test_builtin_elementwise_atan(float f, double d, vfloat4 vf4,
+  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_atan
+  // LLVM-LABEL: test_builtin_elementwise_atan
+  // OGCG-LABEL: test_builtin_elementwise_atan
+  // CIR: {{%.*}} = cir.atan {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.atan.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.atan.f32(float {{%.*}})
+  f = __builtin_elementwise_atan(f);
+
+  // CIR: {{%.*}} = cir.atan {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.atan.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.atan.f64(double {{%.*}})
+  d = __builtin_elementwise_atan(d);
+
+  // CIR: {{%.*}} = cir.atan {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.atan.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.atan.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_atan(vf4);
+
+  // CIR: {{%.*}} = cir.atan {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.atan.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.atan.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_atan(vd4);
+}
+
+void test_builtin_elementwise_atan2(float f, double d, vfloat4 vf4,
+  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_atan2
+  // LLVM-LABEL: test_builtin_elementwise_atan2
+  // OGCG-LABEL: test_builtin_elementwise_atan2
+  // CIR: {{%.*}} = cir.atan2 {{%.*}}, {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.atan2.f32(float {{%.*}}, float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.atan2.f32(float {{%.*}}, float {{%.*}})
+  f = __builtin_elementwise_atan2(f, f);
+
+  // CIR: {{%.*}} = cir.atan2 {{%.*}}, {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.atan2.f64(double {{%.*}}, double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.atan2.f64(double {{%.*}}, double {{%.*}})
+  d = __builtin_elementwise_atan2(d, d);
+
+  // CIR: {{%.*}} = cir.atan2 {{%.*}}, {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.atan2.v4f32(<4 x float> {{%.*}}, <4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.atan2.v4f32(<4 x float> {{%.*}}, <4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_atan2(vf4, vf4);
+
+  // CIR: {{%.*}} = cir.atan2 {{%.*}}, {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.atan2.v4f64(<4 x double> {{%.*}}, <4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.atan2.v4f64(<4 x double> {{%.*}}, <4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_atan2(vd4, vd4);
+}
+
+void test_builtin_elementwise_exp(float f, double d, vfloat4 vf4,
+                                  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_exp
+  // LLVM-LABEL: test_builtin_elementwise_exp
+  // OGCG-LABEL: test_builtin_elementwise_exp
+  // CIR: {{%.*}} = cir.exp {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.exp.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.exp.f32(float {{%.*}})
+  f = __builtin_elementwise_exp(f);
+
+  // CIR: {{%.*}} = cir.exp {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.exp.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.exp.f64(double {{%.*}})
+  d = __builtin_elementwise_exp(d);
+
+  // CIR: {{%.*}} = cir.exp {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.exp.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.exp.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_exp(vf4);
+
+  // CIR: {{%.*}} = cir.exp {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.exp.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.exp.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_exp(vd4);
+}
+
+void test_builtin_elementwise_exp2(float f, double d, vfloat4 vf4,
+                                  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_exp2
+  // LLVM-LABEL: test_builtin_elementwise_exp2
+  // OGCG-LABEL: test_builtin_elementwise_exp2
+  // CIR: {{%.*}} = cir.exp2 {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.exp2.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.exp2.f32(float {{%.*}})
+  f = __builtin_elementwise_exp2(f);
+
+  // CIR: {{%.*}} = cir.exp2 {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.exp2.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.exp2.f64(double {{%.*}})
+  d = __builtin_elementwise_exp2(d);
+
+  // CIR: {{%.*}} = cir.exp2 {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.exp2.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.exp2.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_exp2(vf4);
+
+  // CIR: {{%.*}} = cir.exp2 {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.exp2.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.exp2.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_exp2(vd4);
+}
+
+void test_builtin_elementwise_log(float f, double d, vfloat4 vf4,
+                                  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_log
+  // LLVM-LABEL: test_builtin_elementwise_log
+  // OGCG-LABEL: test_builtin_elementwise_log
+  // CIR: {{%.*}} = cir.log {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.log.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.log.f32(float {{%.*}})
+  f = __builtin_elementwise_log(f);
+
+  // CIR: {{%.*}} = cir.log {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.log.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.log.f64(double {{%.*}})
+  d = __builtin_elementwise_log(d);
+
+  // CIR: {{%.*}} = cir.log {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.log.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.log.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_log(vf4);
+
+  // CIR: {{%.*}} = cir.log {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.log.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.log.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_log(vd4);
+}
+
+void test_builtin_elementwise_log2(float f, double d, vfloat4 vf4,
+                                    vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_log2
+  // LLVM-LABEL: test_builtin_elementwise_log2
+  // OGCG-LABEL: test_builtin_elementwise_log2
+  // CIR: {{%.*}} = cir.log2 {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.log2.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.log2.f32(float {{%.*}})
+  f = __builtin_elementwise_log2(f);
+
+  // CIR: {{%.*}} = cir.log2 {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.log2.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.log2.f64(double {{%.*}})
+  d = __builtin_elementwise_log2(d);
+
+  // CIR: {{%.*}} = cir.log2 {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.log2.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.log2.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_log2(vf4);
+
+  // CIR: {{%.*}} = cir.log2 {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.log2.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.log2.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_log2(vd4);
+}
+
+void test_builtin_elementwise_log10(float f, double d, vfloat4 vf4,
+                                     vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_log10
+  // LLVM-LABEL: test_builtin_elementwise_log10
+  // OGCG-LABEL: test_builtin_elementwise_log10
+  // CIR: {{%.*}} = cir.log10 {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.log10.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.log10.f32(float {{%.*}})
+  f = __builtin_elementwise_log10(f);
+
+  // CIR: {{%.*}} = cir.log10 {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.log10.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.log10.f64(double {{%.*}})
+  d = __builtin_elementwise_log10(d);
+
+  // CIR: {{%.*}} = cir.log10 {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.log10.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.log10.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_log10(vf4);
+
+  // CIR: {{%.*}} = cir.log10 {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.log10.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.log10.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_log10(vd4);
+}
+
+void test_builtin_elementwise_cos(float f, double d, vfloat4 vf4,
+                                     vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_cos
+  // LLVM-LABEL: test_builtin_elementwise_cos
+  // OGCG-LABEL: test_builtin_elementwise_cos
+  // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.cos.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.cos.f32(float {{%.*}})
+  f = __builtin_elementwise_cos(f);
+
+  // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.cos.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.cos.f64(double {{%.*}})
+  d = __builtin_elementwise_cos(d);
+
+  // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.cos.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.cos.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_cos(vf4);
+
+  // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.cos.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.cos.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_cos(vd4);
+}
+
+void test_builtin_elementwise_floor(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_floor
+  // LLVM-LABEL: test_builtin_elementwise_floor
+  // OGCG-LABEL: test_builtin_elementwise_floor
+  // CIR: {{%.*}} = cir.floor {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.floor.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.floor.f32(float {{%.*}})
+  f = __builtin_elementwise_floor(f);
+
+  // CIR: {{%.*}} = cir.floor {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.floor.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.floor.f64(double {{%.*}})
+  d = __builtin_elementwise_floor(d);
+
+  // CIR: {{%.*}} = cir.floor {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.floor.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.floor.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_floor(vf4);
+
+  // CIR: {{%.*}} = cir.floor {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.floor.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.floor.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_floor(vd4);
+}
+
+void test_builtin_elementwise_round(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_round
+  // LLVM-LABEL: test_builtin_elementwise_round
+  // OGCG-LABEL: test_builtin_elementwise_round
+  // CIR: {{%.*}} = cir.round {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.round.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.round.f32(float {{%.*}})
+  f = __builtin_elementwise_round(f);
+
+  // CIR: {{%.*}} = cir.round {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.round.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.round.f64(double {{%.*}})
+  d = __builtin_elementwise_round(d);
+
+  // CIR: {{%.*}} = cir.round {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.round.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.round.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_round(vf4);
+
+  // CIR: {{%.*}} = cir.round {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.round.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.round.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_round(vd4);
+}
+
+void test_builtin_elementwise_rint(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_rint
+  // LLVM-LABEL: test_builtin_elementwise_rint
+  // OGCG-LABEL: test_builtin_elementwise_rint
+  // CIR: {{%.*}} = cir.rint {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.rint.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.rint.f32(float {{%.*}})
+  f = __builtin_elementwise_rint(f);
+
+  // CIR: {{%.*}} = cir.rint {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.rint.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.rint.f64(double {{%.*}})
+  d = __builtin_elementwise_rint(d);
+
+  // CIR: {{%.*}} = cir.rint {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.rint.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.rint.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_rint(vf4);
+
+  // CIR: {{%.*}} = cir.rint {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.rint.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.rint.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_rint(vd4);
+}
+
+void test_builtin_elementwise_nearbyint(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_nearbyint
+  // LLVM-LABEL: test_builtin_elementwise_nearbyint
+  // OGCG-LABEL: test_builtin_elementwise_nearbyint
+  // CIR: {{%.*}} = cir.nearbyint {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.nearbyint.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.nearbyint.f32(float {{%.*}})
+  f = __builtin_elementwise_nearbyint(f);
+
+  // CIR: {{%.*}} = cir.nearbyint {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.nearbyint.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.nearbyint.f64(double {{%.*}})
+  d = __builtin_elementwise_nearbyint(d);
+
+  // CIR: {{%.*}} = cir.nearbyint {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_nearbyint(vf4);
+
+  // CIR: {{%.*}} = cir.nearbyint {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_nearbyint(vd4);
+}
+
+void test_builtin_elementwise_sin(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_sin
+  // LLVM-LABEL: test_builtin_elementwise_sin
+  // OGCG-LABEL: test_builtin_elementwise_sin
+  // CIR: {{%.*}} = cir.sin {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.sin.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.sin.f32(float {{%.*}})
+  f = __builtin_elementwise_sin(f);
+
+  // CIR: {{%.*}} = cir.sin {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.sin.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.sin.f64(double {{%.*}})
+  d = __builtin_elementwise_sin(d);
+
+  // CIR: {{%.*}} = cir.sin {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.sin.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.sin.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_sin(vf4);
+
+  // CIR: {{%.*}} = cir.sin {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.sin.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.sin.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_sin(vd4);
+}
+
+void test_builtin_elementwise_sqrt(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_sqrt
+  // LLVM-LABEL: test_builtin_elementwise_sqrt
+  // OGCG-LABEL: test_builtin_elementwise_sqrt
+  // CIR: {{%.*}} = cir.sqrt {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.sqrt.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.sqrt.f32(float {{%.*}})
+  f = __builtin_elementwise_sqrt(f);
+
+  // CIR: {{%.*}} = cir.sqrt {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.sqrt.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.sqrt.f64(double {{%.*}})
+  d = __builtin_elementwise_sqrt(d);
+
+  // CIR: {{%.*}} = cir.sqrt {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.sqrt.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.sqrt.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_sqrt(vf4);
+
+  // CIR: {{%.*}} = cir.sqrt {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.sqrt.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.sqrt.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_sqrt(vd4);
+}
+
+void test_builtin_elementwise_tan(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_tan
+  // LLVM-LABEL: test_builtin_elementwise_tan
+  // OGCG-LABEL: test_builtin_elementwise_tan
+  // CIR: {{%.*}} = cir.tan {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.tan.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.tan.f32(float {{%.*}})
+  f = __builtin_elementwise_tan(f);
+
+  // CIR: {{%.*}} = cir.tan {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.tan.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.tan.f64(double {{%.*}})
+  d = __builtin_elementwise_tan(d);
+
+  // CIR: {{%.*}} = cir.tan {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.tan.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.tan.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_tan(vf4);
+
+  // CIR: {{%.*}} = cir.tan {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.tan.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.tan.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_tan(vd4);
+}
+
+void test_builtin_elementwise_trunc(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_trunc
+  // LLVM-LABEL: test_builtin_elementwise_trunc
+  // OGCG-LABEL: test_builtin_elementwise_trunc
+  // CIR: {{%.*}} = cir.trunc {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.trunc.f32(float {{%.*}})
+  // OGCG: {{%.*}} = call float @llvm.trunc.f32(float {{%.*}})
+  f = __builtin_elementwise_trunc(f);
+
+  // CIR: {{%.*}} = cir.trunc {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.trunc.f64(double {{%.*}})
+  // OGCG: {{%.*}} = call double @llvm.trunc.f64(double {{%.*}})
+  d = __builtin_elementwise_trunc(d);
+
+  // CIR: {{%.*}} = cir.trunc {{%.*}} : !cir.vector<4 x !cir.float>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.trunc.v4f32(<4 x float> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x float> @llvm.trunc.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_trunc(vf4);
+
+  // CIR: {{%.*}} = cir.trunc {{%.*}} : !cir.vector<4 x !cir.double>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.trunc.v4f64(<4 x double> {{%.*}})
+  // OGCG: {{%.*}} = call <4 x double> @llvm.trunc.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_trunc(vd4);
+}

>From ec4d8d580a1f0cf3ad82b674e94137562691c10c Mon Sep 17 00:00:00 2001
From: Adam Smith <adams at nvidia.com>
Date: Thu, 8 Jan 2026 09:40:23 -0800
Subject: [PATCH 4/6] [CIR] Add integer abs/labs/llabs builtin intrinsic
 support

Implement support for the integer absolute value builtins (abs, labs,
llabs, __builtin_abs, __builtin_labs, __builtin_llabs) to emit cir.abs
operations instead of library calls.

The implementation handles signed overflow behavior:
- SOB_Defined (-fwrapv): emit cir.abs without poison flag
- SOB_Undefined: emit cir.abs with poison flag (allows optimization)
- SOB_Trapping: not yet implemented (llvm_unreachable)

Also fixes a deprecated builder.create<AbsOp> pattern in the
__builtin_elementwise_abs handler to use AbsOp::create instead.

Updates libc.c test to:
- Add LLVM IR verification with llvm.abs intrinsic checks
- Add -fwrapv mode tests for non-poison behavior
- Verify both poison (default) and non-poison (-fwrapv) modes
---
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 36 ++++++++++++++++++++++---
 clang/test/CIR/CodeGen/libc.c           | 28 ++++++++++++++-----
 2 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 0d9d12f744450..892b53715c170 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -748,6 +748,36 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
     cir::VACopyOp::create(builder, dstPtr.getLoc(), dstPtr, srcPtr);
     return {};
   }
+
+  case Builtin::BIabs:
+  case Builtin::BIlabs:
+  case Builtin::BIllabs:
+  case Builtin::BI__builtin_abs:
+  case Builtin::BI__builtin_labs:
+  case Builtin::BI__builtin_llabs: {
+    bool sanitizeOverflow = sanOpts.has(SanitizerKind::SignedIntegerOverflow);
+    mlir::Value arg = emitScalarExpr(e->getArg(0));
+    mlir::Value result;
+    switch (getLangOpts().getSignedOverflowBehavior()) {
+    case LangOptions::SOB_Defined:
+      result = cir::AbsOp::create(builder, loc, arg.getType(), arg,
+                                  /*poison=*/false);
+      break;
+    case LangOptions::SOB_Undefined:
+      if (!sanitizeOverflow) {
+        result = cir::AbsOp::create(builder, loc, arg.getType(), arg,
+                                    /*poison=*/true);
+        break;
+      }
+      llvm_unreachable("BI__builtin_abs with LangOptions::SOB_Undefined when "
+                       "SanitizeOverflow is true");
+      [[fallthrough]];
+    case LangOptions::SOB_Trapping:
+      llvm_unreachable("BI__builtin_abs with LangOptions::SOB_Trapping");
+    }
+    return RValue::get(result);
+  }
+
   case Builtin::BI__assume:
   case Builtin::BI__builtin_assume: {
     if (e->getArg(0)->HasSideEffects(getContext()))
@@ -1158,9 +1188,9 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
     if (!isIntTy)
       return emitUnaryFPBuiltin<cir::FAbsOp>(*this, *e);
     mlir::Value arg = emitScalarExpr(e->getArg(0));
-    auto call = builder.create<cir::AbsOp>(getLoc(e->getExprLoc()),
-                                           arg.getType(), arg, false);
-    return RValue::get(call->getResult(0));
+    mlir::Value result = cir::AbsOp::create(builder, getLoc(e->getExprLoc()),
+                                            arg.getType(), arg, false);
+    return RValue::get(result);
   }
   case Builtin::BI__builtin_elementwise_acos:
     return emitUnaryFPBuiltin<cir::ACosOp>(*this, *e);
diff --git a/clang/test/CIR/CodeGen/libc.c b/clang/test/CIR/CodeGen/libc.c
index d4b61dab7bc42..f0801cbd63d11 100644
--- a/clang/test/CIR/CodeGen/libc.c
+++ b/clang/test/CIR/CodeGen/libc.c
@@ -1,10 +1,17 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
 // RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -fwrapv
+// RUN: FileCheck --check-prefix=CIR_NO_POISON --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -fwrapv
+// RUN: FileCheck --check-prefix=LLVM_NO_POISON --input-file=%t.ll %s
 
 // Note: In the final implementation, we will want these to generate
-//       CIR-specific libc operations. This test is just a placeholder
-//       to make sure we can compile these to normal function calls
-//       until the special handling is implemented.
+// CIR-specific libc operations. This test is just a placeholder
+// to make sure we can compile these to normal function calls
+// until the special handling is implemented.
 
 void *memcpy(void *, const void *, unsigned long);
 void testMemcpy(void *dst, const void *src, unsigned long size) {
@@ -39,17 +46,26 @@ float testFabsf(float x) {
 int abs(int);
 int testAbs(int x) {
   return abs(x);
-  // CHECK: cir.call @abs
+  // CHECK: cir.abs %{{.+}} poison : !s32i
+  // LLVM: %{{.+}} = call i32 @llvm.abs.i32(i32 %{{.+}}, i1 true)
+  // CIR_NO_POISON: cir.abs %{{.+}} : !s32i
+  // LLVM_NO_POISON: %{{.+}} = call i32 @llvm.abs.i32(i32 %{{.+}}, i1 false)
 }
 
 long labs(long);
 long testLabs(long x) {
   return labs(x);
-  // CHECK: cir.call @labs
+  // CHECK: cir.abs %{{.+}} poison : !s64i
+  // LLVM: %{{.+}} = call i64 @llvm.abs.i64(i64 %{{.+}}, i1 true)
+  // CIR_NO_POISON: cir.abs %{{.+}} : !s64i
+  // LLVM_NO_POISON: %{{.+}} = call i64 @llvm.abs.i64(i64 %{{.+}}, i1 false)
 }
 
 long long llabs(long long);
 long long testLlabs(long long x) {
   return llabs(x);
-  // CHECK: cir.call @llabs
+  // CHECK: cir.abs %{{.+}} poison : !s64i
+  // LLVM: %{{.+}} = call i64 @llvm.abs.i64(i64 %{{.+}}, i1 true)
+  // CIR_NO_POISON: cir.abs %{{.+}} : !s64i
+  // LLVM_NO_POISON: %{{.+}} = call i64 @llvm.abs.i64(i64 %{{.+}}, i1 false)
 }

>From b5128d7adbcc0e6f23266a51ec092cb06e2f12e5 Mon Sep 17 00:00:00 2001
From: Adam Smith <adams at nvidia.com>
Date: Thu, 8 Jan 2026 13:41:46 -0800
Subject: [PATCH 5/6] [CIR] Add __builtin_unpredictable and rotate builtin
 tests

- Implement __builtin_unpredictable in CIRGenBuiltin.cpp
- Add pred-info-builtins.c test for expect, expect_with_probability,
  and unpredictable builtins
- Add builtin-rotate.c test for rotate left/right builtins (8/16/32/64 bit)
- All tests include CIR, LLVM, and OGCG checks to verify CIR-produced
  LLVM matches original codegen
---
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp       |   6 +
 .../test/CIR/CodeGen/builtin-floating-point.c |   6 +-
 clang/test/CIR/CodeGen/builtin-rotate.c       | 118 ++++++++++++++++++
 clang/test/CIR/CodeGen/pred-info-builtins.c   |  72 +++++++++++
 4 files changed, 200 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CIR/CodeGen/builtin-rotate.c
 create mode 100644 clang/test/CIR/CodeGen/pred-info-builtins.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 892b53715c170..0f30de30bf405 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -900,6 +900,12 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
   case Builtin::BI__builtin_popcountg:
     return emitBuiltinBitOp<cir::BitPopcountOp>(*this, e);
 
+  case Builtin::BI__builtin_unpredictable: {
+    if (cgm.getCodeGenOpts().OptimizationLevel != 0)
+      assert(!cir::MissingFeatures::insertBuiltinUnpredictable());
+    return RValue::get(emitScalarExpr(e->getArg(0)));
+  }
+
   case Builtin::BI__builtin_expect:
   case Builtin::BI__builtin_expect_with_probability: {
     mlir::Value argValue = emitScalarExpr(e->getArg(0));
diff --git a/clang/test/CIR/CodeGen/builtin-floating-point.c b/clang/test/CIR/CodeGen/builtin-floating-point.c
index 097e42ef57246..a8ba62ef28a06 100644
--- a/clang/test/CIR/CodeGen/builtin-floating-point.c
+++ b/clang/test/CIR/CodeGen/builtin-floating-point.c
@@ -1,5 +1,7 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t1.cir 2>&1 | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-apple-darwin-macho -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t1.cir 2>&1 | FileCheck %s --check-prefix=AARCH64
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-apple-darwin-macho -fclangir -emit-cir %s -o %t-aarch64.cir
+// RUN: FileCheck --input-file=%t-aarch64.cir %s --check-prefix=AARCH64
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o %t.ll %s
 // RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
 
diff --git a/clang/test/CIR/CodeGen/builtin-rotate.c b/clang/test/CIR/CodeGen/builtin-rotate.c
new file mode 100644
index 0000000000000..9f69e76f96148
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin-rotate.c
@@ -0,0 +1,118 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+void f() {
+// CIR-LABEL: @f
+// LLVM-LABEL: @f
+// OGCG-LABEL: @f
+  unsigned int v[4];
+  unsigned int h = __builtin_rotateleft32(v[0], 1);
+// CIR: %[[CONST:.*]] = cir.const #cir.int<1> : !u32i
+// CIR: cir.rotate left {{.*}}, %[[CONST]] : !u32i
+
+// LLVM: %[[SRC:.*]] = load i32, ptr
+// LLVM: call i32 @llvm.fshl.i32(i32 %[[SRC]], i32 %[[SRC]], i32 1)
+
+// OGCG: %[[SRC:.*]] = load i32, ptr
+// OGCG: call i32 @llvm.fshl.i32(i32 %[[SRC]], i32 %[[SRC]], i32 1)
+}
+
+unsigned char rotl8(unsigned char x, unsigned char y) {
+// CIR-LABEL: rotl8
+// CIR: cir.rotate left {{.*}}, {{.*}} : !u8i
+
+// LLVM-LABEL: rotl8
+// LLVM: [[F:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 [[Y:%.*]])
+
+// OGCG-LABEL: rotl8
+// OGCG: call i8 @llvm.fshl.i8(i8 {{.*}}, i8 {{.*}}, i8 {{.*}})
+  return __builtin_rotateleft8(x, y);
+}
+
+short rotl16(short x, short y) {
+// CIR-LABEL: rotl16
+// CIR: cir.rotate left {{.*}}, {{.*}} : !u16i
+
+// LLVM-LABEL: rotl16
+// LLVM: [[F:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y:%.*]])
+
+// OGCG-LABEL: rotl16
+// OGCG: call i16 @llvm.fshl.i16(i16 {{.*}}, i16 {{.*}}, i16 {{.*}})
+  return __builtin_rotateleft16(x, y);
+}
+
+int rotl32(int x, unsigned int y) {
+// CIR-LABEL: rotl32
+// CIR: cir.rotate left {{.*}}, {{.*}} : !u32i
+
+// LLVM-LABEL: rotl32
+// LLVM: [[F:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
+
+// OGCG-LABEL: rotl32
+// OGCG: call i32 @llvm.fshl.i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  return __builtin_rotateleft32(x, y);
+}
+
+unsigned long long rotl64(unsigned long long x, long long y) {
+// CIR-LABEL: rotl64
+// CIR: cir.rotate left {{.*}}, {{.*}} : !u64i
+
+// LLVM-LABEL: rotl64
+// LLVM: [[F:%.*]] = call i64 @llvm.fshl.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
+
+// OGCG-LABEL: rotl64
+// OGCG: call i64 @llvm.fshl.i64(i64 {{.*}}, i64 {{.*}}, i64 {{.*}})
+  return __builtin_rotateleft64(x, y);
+}
+
+char rotr8(char x, char y) {
+// CIR-LABEL: rotr8
+// CIR: cir.rotate right {{.*}}, {{.*}} : !u8i
+
+// LLVM-LABEL: rotr8
+// LLVM: [[F:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[Y:%.*]])
+
+// OGCG-LABEL: rotr8
+// OGCG: call i8 @llvm.fshr.i8(i8 {{.*}}, i8 {{.*}}, i8 {{.*}})
+  return __builtin_rotateright8(x, y);
+}
+
+unsigned short rotr16(unsigned short x, unsigned short y) {
+// CIR-LABEL: rotr16
+// CIR: cir.rotate right {{.*}}, {{.*}} : !u16i
+
+// LLVM-LABEL: rotr16
+// LLVM: [[F:%.*]] = call i16 @llvm.fshr.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y:%.*]])
+
+// OGCG-LABEL: rotr16
+// OGCG: call i16 @llvm.fshr.i16(i16 {{.*}}, i16 {{.*}}, i16 {{.*}})
+  return __builtin_rotateright16(x, y);
+}
+
+unsigned int rotr32(unsigned int x, int y) {
+// CIR-LABEL: rotr32
+// CIR: cir.rotate right {{.*}}, {{.*}} : !u32i
+
+// LLVM-LABEL: rotr32
+// LLVM: [[F:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
+
+// OGCG-LABEL: rotr32
+// OGCG: call i32 @llvm.fshr.i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+  return __builtin_rotateright32(x, y);
+}
+
+long long rotr64(long long x, unsigned long long y) {
+// CIR-LABEL: rotr64
+// CIR: cir.rotate right {{.*}}, {{.*}} : !u64i
+
+// LLVM-LABEL: rotr64
+// LLVM: [[F:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
+
+// OGCG-LABEL: rotr64
+// OGCG: call i64 @llvm.fshr.i64(i64 {{.*}}, i64 {{.*}}, i64 {{.*}})
+  return __builtin_rotateright64(x, y);
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/pred-info-builtins.c b/clang/test/CIR/CodeGen/pred-info-builtins.c
new file mode 100644
index 0000000000000..da9d42dc109f4
--- /dev/null
+++ b/clang/test/CIR/CodeGen/pred-info-builtins.c
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 -O0 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s --check-prefix=CIR-O0
+// RUN: %clang_cc1 -O2 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s --check-prefix=CIR-O2
+// RUN: %clang_cc1 -O0 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s --check-prefix=LLVM
+// RUN: %clang_cc1 -O0 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=OGCG
+
+extern void __attribute__((noinline)) bar(void);
+
+void expect(int x) {
+  if (__builtin_expect(x, 0))
+    bar();
+}
+// CIR-O0: cir.func dso_local @expect
+// CIR-O0:   cir.if {{%.*}} {
+// CIR-O0:     cir.call @bar() : () -> ()
+
+// CIR-O2: cir.func dso_local @expect
+// CIR-O2:   [[EXPECT:%.*]] = cir.expect({{.*}}, {{.*}}) : !s64i
+// CIR-O2:   [[EXPECT_BOOL:%.*]] = cir.cast int_to_bool [[EXPECT]] : !s64i -> !cir.bool
+// CIR-O2:   cir.if [[EXPECT_BOOL]]
+// CIR-O2:     cir.call @bar() : () -> ()
+
+// LLVM-LABEL: @expect
+// LLVM: br i1 {{.*}}, label %[[THEN:.*]], label %[[END:.*]]
+// LLVM: [[THEN]]:
+// LLVM: call void @bar()
+
+// OGCG-LABEL: @expect
+// OGCG: br i1 {{.*}}, label %[[THEN:.*]], label %[[END:.*]]
+// OGCG: [[THEN]]:
+// OGCG: call void @bar()
+
+void expect_with_probability(int x) {
+  if (__builtin_expect_with_probability(x, 1, 0.8))
+    bar();
+}
+// CIR-O0: cir.func dso_local @expect_with_probability
+// CIR-O0:   cir.if {{%.*}} {
+// CIR-O0:     cir.call @bar() : () -> ()
+
+// CIR-O2:  cir.func dso_local @expect_with_probability
+// CIR-O2:    [[EXPECT:%.*]] = cir.expect({{.*}}, {{.*}}, 8.000000e-01) : !s64i
+// CIR-O2:    [[EXPECT_BOOL:%.*]] = cir.cast int_to_bool [[EXPECT]] : !s64i -> !cir.bool
+// CIR-O2:    cir.if [[EXPECT_BOOL]]
+// CIR-O2:      cir.call @bar() : () -> ()
+
+// LLVM-LABEL: @expect_with_probability
+// LLVM: br i1 {{.*}}, label %[[THEN:.*]], label %[[END:.*]]
+// LLVM: [[THEN]]:
+// LLVM: call void @bar()
+
+// OGCG-LABEL: @expect_with_probability
+// OGCG: br i1 {{.*}}, label %[[THEN:.*]], label %[[END:.*]]
+// OGCG: [[THEN]]:
+// OGCG: call void @bar()
+
+void unpredictable(int x) {
+  if (__builtin_unpredictable(x > 1))
+    bar();
+}
+// CIR-O0: cir.func dso_local @unpredictable
+// CIR-O0:   cir.if {{%.*}} {
+// CIR-O0:     cir.call @bar() : () -> ()
+
+// LLVM-LABEL: @unpredictable
+// LLVM: br i1 {{.*}}, label %[[THEN:.*]], label %[[END:.*]]
+// LLVM: [[THEN]]:
+// LLVM: call void @bar()
+
+// OGCG-LABEL: @unpredictable
+// OGCG: br i1 {{.*}}, label %[[THEN:.*]], label %[[END:.*]]
+// OGCG: [[THEN]]:
+// OGCG: call void @bar()

>From de8e55d93b024b92ea191d4bde2453a46da09683 Mon Sep 17 00:00:00 2001
From: Adam Smith <adams at nvidia.com>
Date: Tue, 13 Jan 2026 14:08:04 -0800
Subject: [PATCH 6/6] [CIR] Address review feedback for math builtins PR

- Rename AbsOp 'poison' attribute to 'min_is_poison' for clarity
- Update AbsOp description to use 'minimum value for the type' instead of INT_MIN
- Add RintOp description explaining FE_INEXACT difference from nearbyint
- Rename LLroundOp/LLrintOp to LlroundOp/LlrintOp to match LLVM dialect
- Add summary and description for FP-to-int ops and binary FP ops
- Add explanatory comment for hasAttributeNoBuiltin setting
- Use errorNYI instead of llvm_unreachable for abs overflow cases
- Remove unnecessary condition around assert in unpredictable builtin
- Update libc.c test to use min_is_poison syntax
---
 clang/include/clang/CIR/Dialect/IR/CIROps.td  | 122 +++++++++++++++---
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp       |  16 +--
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp          |   5 +
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp |  14 +-
 clang/test/CIR/CodeGen/libc.c                 |   6 +-
 5 files changed, 126 insertions(+), 37 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 7335f5ceca9f3..e907202a10353 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -5202,7 +5202,9 @@ def CIR_RintOp : CIR_UnaryFPToFPBuiltinOp<"rint", "RintOp"> {
     `cir.rint` rounds a floating-point operand to the nearest integer value
     and returns a result of the same type.
 
-    Floating-point exceptions are ignored, and it does not set `errno`.
+    This operation does not set `errno`. Unlike `cir.nearbyint`, this operation
+    may raise the `FE_INEXACT` exception if the input value is not an exact
+    integer, but this is not guaranteed to happen.
   }];
 }
 
@@ -5272,26 +5274,27 @@ def CIR_AbsOp : CIR_Op<"abs", [Pure, SameOperandsAndResultType]> {
     `cir.abs` computes the absolute value of a signed integer or vector
     of signed integers.
 
-    The `poison` attribute indicates whether the result value is a poison
-    value if the argument is statically or dynamically INT_MIN.
+    The `min_is_poison` attribute indicates whether the result value is a
+    poison value if the argument is statically or dynamically the minimum
+    value for the type.
 
     Example:
 
     ```mlir
       %0 = cir.const #cir.int<-42> : s32i
-      %1 = cir.abs %0 poison : s32i
+      %1 = cir.abs %0 min_is_poison : s32i
       %2 = cir.abs %3 : !cir.vector<!s32i x 4>
     ```
   }];
 
   let arguments = (ins
     CIR_AnySIntOrVecOfSIntType:$src,
-    UnitAttr:$poison
+    UnitAttr:$min_is_poison
   );
 
   let results = (outs CIR_AnySIntOrVecOfSIntType:$result);
 
-  let assemblyFormat = "$src ( `poison` $poison^ )? `:` type($src) attr-dict";
+  let assemblyFormat = "$src ( `min_is_poison` $min_is_poison^ )? `:` type($src) attr-dict";
 }
 
 def CIR_FloorOp : CIR_UnaryFPToFPBuiltinOp<"floor", "FloorOp"> {
@@ -5329,10 +5332,38 @@ class CIR_UnaryFPToIntBuiltinOp<string mnemonic, string llvmOpName>
   let llvmOp = llvmOpName;
 }
 
-def CIR_LroundOp : CIR_UnaryFPToIntBuiltinOp<"lround", "LroundOp">;
-def CIR_LLroundOp : CIR_UnaryFPToIntBuiltinOp<"llround", "LlroundOp">;
-def CIR_LrintOp : CIR_UnaryFPToIntBuiltinOp<"lrint", "LrintOp">;
-def CIR_LLrintOp : CIR_UnaryFPToIntBuiltinOp<"llrint", "LlrintOp">;
+def CIR_LroundOp : CIR_UnaryFPToIntBuiltinOp<"lround", "LroundOp"> {
+  let summary = "Rounds floating-point to long integer";
+  let description = [{
+    `cir.lround` rounds a floating-point value to the nearest integer value,
+    rounding halfway cases away from zero, and returns the result as a `long`.
+  }];
+}
+
+def CIR_LlroundOp : CIR_UnaryFPToIntBuiltinOp<"llround", "LlroundOp"> {
+  let summary = "Rounds floating-point to long long integer";
+  let description = [{
+    `cir.llround` rounds a floating-point value to the nearest integer value,
+    rounding halfway cases away from zero, and returns the result as a
+    `long long`.
+  }];
+}
+
+def CIR_LrintOp : CIR_UnaryFPToIntBuiltinOp<"lrint", "LrintOp"> {
+  let summary = "Rounds floating-point to long integer using current rounding mode";
+  let description = [{
+    `cir.lrint` rounds a floating-point value to the nearest integer value
+    using the current rounding mode and returns the result as a `long`.
+  }];
+}
+
+def CIR_LlrintOp : CIR_UnaryFPToIntBuiltinOp<"llrint", "LlrintOp"> {
+  let summary = "Rounds floating-point to long long integer using current rounding mode";
+  let description = [{
+    `cir.llrint` rounds a floating-point value to the nearest integer value
+    using the current rounding mode and returns the result as a `long long`.
+  }];
+}
 
 class CIR_BinaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
     : CIR_Op<mnemonic, [Pure, SameOperandsAndResultType]> {
@@ -5354,14 +5385,69 @@ class CIR_BinaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
   let llvmOp = llvmOpName;
 }
 
-def CIR_CopysignOp : CIR_BinaryFPToFPBuiltinOp<"copysign", "CopySignOp">;
-def CIR_FMaxNumOp : CIR_BinaryFPToFPBuiltinOp<"fmaxnum", "MaxNumOp">;
-def CIR_FMaximumOp : CIR_BinaryFPToFPBuiltinOp<"fmaximum", "MaximumOp">;
-def CIR_FMinNumOp : CIR_BinaryFPToFPBuiltinOp<"fminnum", "MinNumOp">;
-def CIR_FMinimumOp : CIR_BinaryFPToFPBuiltinOp<"fminimum", "MinimumOp">;
-def CIR_FModOp : CIR_BinaryFPToFPBuiltinOp<"fmod", "FRemOp">;
-def CIR_PowOp : CIR_BinaryFPToFPBuiltinOp<"pow", "PowOp">;
-def CIR_ATan2Op : CIR_BinaryFPToFPBuiltinOp<"atan2", "ATan2Op">;
+def CIR_CopysignOp : CIR_BinaryFPToFPBuiltinOp<"copysign", "CopySignOp"> {
+  let summary = "Copies the sign of a floating-point value";
+  let description = [{
+    `cir.copysign` returns a value with the magnitude of the first operand
+    and the sign of the second operand.
+  }];
+}
+
+def CIR_FMaxNumOp : CIR_BinaryFPToFPBuiltinOp<"fmaxnum", "MaxNumOp"> {
+  let summary = "Returns the larger of two floating-point values";
+  let description = [{
+    `cir.fmaxnum` returns the larger of its two operands. If one operand is
+    NaN, the other operand is returned.
+  }];
+}
+
+def CIR_FMaximumOp : CIR_BinaryFPToFPBuiltinOp<"fmaximum", "MaximumOp"> {
+  let summary = "Returns the larger of two floating-point values (IEEE 754-2019)";
+  let description = [{
+    `cir.fmaximum` returns the larger of its two operands according to
+    IEEE 754-2019 semantics. If either operand is NaN, NaN is returned.
+  }];
+}
+
+def CIR_FMinNumOp : CIR_BinaryFPToFPBuiltinOp<"fminnum", "MinNumOp"> {
+  let summary = "Returns the smaller of two floating-point values";
+  let description = [{
+    `cir.fminnum` returns the smaller of its two operands. If one operand is
+    NaN, the other operand is returned.
+  }];
+}
+
+def CIR_FMinimumOp : CIR_BinaryFPToFPBuiltinOp<"fminimum", "MinimumOp"> {
+  let summary = "Returns the smaller of two floating-point values (IEEE 754-2019)";
+  let description = [{
+    `cir.fminimum` returns the smaller of its two operands according to
+    IEEE 754-2019 semantics. If either operand is NaN, NaN is returned.
+  }];
+}
+
+def CIR_FModOp : CIR_BinaryFPToFPBuiltinOp<"fmod", "FRemOp"> {
+  let summary = "Computes the floating-point remainder";
+  let description = [{
+    `cir.fmod` computes the floating-point remainder of dividing the first
+    operand by the second operand.
+  }];
+}
+
+def CIR_PowOp : CIR_BinaryFPToFPBuiltinOp<"pow", "PowOp"> {
+  let summary = "Computes the power of a floating-point value";
+  let description = [{
+    `cir.pow` computes the first operand raised to the power of the second
+    operand.
+  }];
+}
+
+def CIR_ATan2Op : CIR_BinaryFPToFPBuiltinOp<"atan2", "ATan2Op"> {
+  let summary = "Computes the arc tangent of y/x";
+  let description = [{
+    `cir.atan2` computes the arc tangent of the first operand divided by the
+    second operand, using the signs of both to determine the quadrant.
+  }];
+}
 
 //===----------------------------------------------------------------------===//
 // Variadic Operations
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 0f30de30bf405..dcf80c959b2f4 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -645,7 +645,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_llroundf:
   case Builtin::BI__builtin_llroundl:
   case Builtin::BI__builtin_llroundf128:
-    return emitUnaryMaybeConstrainedFPToIntBuiltin<cir::LLroundOp>(cgf, *e);
+    return emitUnaryMaybeConstrainedFPToIntBuiltin<cir::LlroundOp>(cgf, *e);
   case Builtin::BIlrint:
   case Builtin::BIlrintf:
   case Builtin::BIlrintl:
@@ -661,7 +661,7 @@ static RValue tryEmitFPMathIntrinsic(CIRGenFunction &cgf, const CallExpr *e,
   case Builtin::BI__builtin_llrintf:
   case Builtin::BI__builtin_llrintl:
   case Builtin::BI__builtin_llrintf128:
-    return emitUnaryMaybeConstrainedFPToIntBuiltin<cir::LLrintOp>(cgf, *e);
+    return emitUnaryMaybeConstrainedFPToIntBuiltin<cir::LlrintOp>(cgf, *e);
   case Builtin::BI__builtin_ldexp:
   case Builtin::BI__builtin_ldexpf:
   case Builtin::BI__builtin_ldexpl:
@@ -761,19 +761,18 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
     switch (getLangOpts().getSignedOverflowBehavior()) {
     case LangOptions::SOB_Defined:
       result = cir::AbsOp::create(builder, loc, arg.getType(), arg,
-                                  /*poison=*/false);
+                                  /*minIsPoison=*/false);
       break;
     case LangOptions::SOB_Undefined:
       if (!sanitizeOverflow) {
         result = cir::AbsOp::create(builder, loc, arg.getType(), arg,
-                                    /*poison=*/true);
+                                    /*minIsPoison=*/true);
         break;
       }
-      llvm_unreachable("BI__builtin_abs with LangOptions::SOB_Undefined when "
-                       "SanitizeOverflow is true");
       [[fallthrough]];
     case LangOptions::SOB_Trapping:
-      llvm_unreachable("BI__builtin_abs with LangOptions::SOB_Trapping");
+      cgm.errorNYI(e->getSourceRange(), "abs with overflow handling");
+      return RValue::get(nullptr);
     }
     return RValue::get(result);
   }
@@ -901,8 +900,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
     return emitBuiltinBitOp<cir::BitPopcountOp>(*this, e);
 
   case Builtin::BI__builtin_unpredictable: {
-    if (cgm.getCodeGenOpts().OptimizationLevel != 0)
-      assert(!cir::MissingFeatures::insertBuiltinUnpredictable());
+    assert(!cir::MissingFeatures::insertBuiltinUnpredictable());
     return RValue::get(emitScalarExpr(e->getArg(0)));
   }
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 0e46faf7077c3..4fae66bebd9b5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -1869,6 +1869,11 @@ CIRGenCallee CIRGenFunction::emitDirectCallee(const GlobalDecl &gd) {
 
     bool isPredefinedLibFunction =
         cgm.getASTContext().BuiltinInfo.isPredefinedLibFunction(builtinID);
+    // TODO: Read no-builtin function attribute and set this accordingly.
+    // Using false here matches OGCG's default behavior - builtins are called
+    // as builtins unless explicitly disabled. The previous value of true was
+    // overly conservative and caused functions to be marked as no_inline when
+    // they shouldn't be.
     bool hasAttributeNoBuiltin = false;
     assert(!cir::MissingFeatures::attributeNoBuiltin());
 
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index f2e65fe9f74d8..13dcab68eb724 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1708,8 +1708,8 @@ mlir::LogicalResult CIRToLLVMLroundOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
-mlir::LogicalResult CIRToLLVMLLroundOpLowering::matchAndRewrite(
-    cir::LLroundOp op, OpAdaptor adaptor,
+mlir::LogicalResult CIRToLLVMLlroundOpLowering::matchAndRewrite(
+    cir::LlroundOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
   mlir::Type resTy = typeConverter->convertType(op.getType());
   rewriter.replaceOpWithNewOp<mlir::LLVM::LlroundOp>(op, resTy,
@@ -1725,8 +1725,8 @@ mlir::LogicalResult CIRToLLVMLrintOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
-mlir::LogicalResult CIRToLLVMLLrintOpLowering::matchAndRewrite(
-    cir::LLrintOp op, OpAdaptor adaptor,
+mlir::LogicalResult CIRToLLVMLlrintOpLowering::matchAndRewrite(
+    cir::LlrintOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
   mlir::Type resTy = typeConverter->convertType(op.getType());
   rewriter.replaceOpWithNewOp<mlir::LLVM::LlrintOp>(op, resTy,
@@ -2188,9 +2188,9 @@ mlir::LogicalResult CIRToLLVMAbsOpLowering::matchAndRewrite(
     cir::AbsOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
   mlir::Type resTy = typeConverter->convertType(op.getType());
-  auto absOp =
-      mlir::LLVM::AbsOp::create(rewriter, op.getLoc(), resTy,
-                                adaptor.getOperands()[0], adaptor.getPoison());
+  auto absOp = mlir::LLVM::AbsOp::create(rewriter, op.getLoc(), resTy,
+                                         adaptor.getOperands()[0],
+                                         adaptor.getMinIsPoison());
   rewriter.replaceOp(op, absOp);
   return mlir::success();
 }
diff --git a/clang/test/CIR/CodeGen/libc.c b/clang/test/CIR/CodeGen/libc.c
index f0801cbd63d11..4d7a6a4087e92 100644
--- a/clang/test/CIR/CodeGen/libc.c
+++ b/clang/test/CIR/CodeGen/libc.c
@@ -46,7 +46,7 @@ float testFabsf(float x) {
 int abs(int);
 int testAbs(int x) {
   return abs(x);
-  // CHECK: cir.abs %{{.+}} poison : !s32i
+  // CHECK: cir.abs %{{.+}} min_is_poison : !s32i
   // LLVM: %{{.+}} = call i32 @llvm.abs.i32(i32 %{{.+}}, i1 true)
   // CIR_NO_POISON: cir.abs %{{.+}} : !s32i
   // LLVM_NO_POISON: %{{.+}} = call i32 @llvm.abs.i32(i32 %{{.+}}, i1 false)
@@ -55,7 +55,7 @@ int testAbs(int x) {
 long labs(long);
 long testLabs(long x) {
   return labs(x);
-  // CHECK: cir.abs %{{.+}} poison : !s64i
+  // CHECK: cir.abs %{{.+}} min_is_poison : !s64i
   // LLVM: %{{.+}} = call i64 @llvm.abs.i64(i64 %{{.+}}, i1 true)
   // CIR_NO_POISON: cir.abs %{{.+}} : !s64i
   // LLVM_NO_POISON: %{{.+}} = call i64 @llvm.abs.i64(i64 %{{.+}}, i1 false)
@@ -64,7 +64,7 @@ long testLabs(long x) {
 long long llabs(long long);
 long long testLlabs(long long x) {
   return llabs(x);
-  // CHECK: cir.abs %{{.+}} poison : !s64i
+  // CHECK: cir.abs %{{.+}} min_is_poison : !s64i
   // LLVM: %{{.+}} = call i64 @llvm.abs.i64(i64 %{{.+}}, i1 true)
   // CIR_NO_POISON: cir.abs %{{.+}} : !s64i
   // LLVM_NO_POISON: %{{.+}} = call i64 @llvm.abs.i64(i64 %{{.+}}, i1 false)