[clang] [flang] [libc] [lldb] [llvm] [mlir] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add AVX512 VPTERNLOGD/VPTERNLOGQ intrinsics to be used in constexpr (PR #158703)

Wed Oct 1 05:58:52 PDT 2025

https://github.com/kimsh02 updated https://github.com/llvm/llvm-project/pull/158703

>From a1852c2b14510cb7ec76136087079f7d857d87c9 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Mon, 15 Sep 2025 10:58:34 -0700
Subject: [PATCH 01/48] [Clang] VectorExprEvaluator::VisitCallExpr /
 InterpretBuiltin - add AVX512 VPTERNLOGD/VPTERNLOGQ intrinsics to be used in
 constexpr

---
 clang/include/clang/Basic/BuiltinsX86.td   |  16 +-
 clang/lib/AST/ByteCode/InterpBuiltin.cpp   |  63 ++++-
 clang/lib/AST/ExprConstant.cpp             |  91 +++++++
 clang/test/CodeGen/X86/avx512f-builtins.c  | 138 ++++++++++
 clang/test/CodeGen/X86/avx512vl-builtins.c | 277 +++++++++++++++++++++
 5 files changed, 580 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index e98bee28c15be..0ce9bb3be9351 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -2405,28 +2405,36 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
   def psraq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">;
   def psrld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">;
   def psrlq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">;
+}
+
+let Features = "avx512f",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def pternlogd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
   def pternlogd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">;
   def pternlogq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
   def pternlogq512_maskz : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def pternlogd128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
   def pternlogd128_maskz : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def pternlogd256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
   def pternlogd256_maskz : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def pternlogq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
   def pternlogq128_maskz : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
   def pternlogq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
   def pternlogq256_maskz : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">;
 }
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a2e97fcafdfef..e04705ac7e6ee 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2874,10 +2874,57 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
   });
 
   Dst.initializeAllElements();
-
   return true;
 }
 
+static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
+                                     const CallExpr *Call, bool MaskZ) {
+  assert(Call->getNumArgs() == 5);
+
+  const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>();
+  unsigned DstLen = VecT->getNumElements();
+  PrimType DstElemT = *S.getContext().classify(VecT->getElementType());
+
+  APSInt U = popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(4)));
+  APSInt Imm = popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(3)));
+  const Pointer &C = S.Stk.pop<Pointer>();
+  const Pointer &B = S.Stk.pop<Pointer>();
+  const Pointer &A = S.Stk.pop<Pointer>();
+
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  for (unsigned I = 0; I < DstLen; ++I) {
+    APSInt a, b, c;
+    INT_TYPE_SWITCH(DstElemT, {
+      a = A.elem<T>(I).toAPSInt();
+      b = B.elem<T>(I).toAPSInt();
+      c = C.elem<T>(I).toAPSInt();
+    });
+
+    unsigned BitWidth = a.getBitWidth();
+    APInt R(BitWidth, 0);
+    bool DstUnsigned = a.isUnsigned();
+
+    if (U[I]) {
+      for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
+        unsigned Idx = (a[Bit] << 2) | (b[Bit] << 1) | (c[Bit]);
+        R.setBitVal(Bit, Imm[Idx]);
+      }
+      INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
+        Dst.elem<T>(I) = static_cast<T>(APSInt(R, DstUnsigned));
+      });
+    } else if (MaskZ) {
+      INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
+        Dst.elem<T>(I) = static_cast<T>(APSInt(R, DstUnsigned));
+      });
+    } else {
+      INT_TYPE_SWITCH_NO_BOOL(DstElemT,
+                              { Dst.elem<T>(I) = static_cast<T>(a); });
+    }
+  }
+  Dst.initializeAllElements();
+}
+
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
                       uint32_t BuiltinID) {
   if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -3661,6 +3708,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
         S, OpPC, Call,
         [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
 
+  case X86::BI__builtin_ia32_pternlogd128_mask:
+  case X86::BI__builtin_ia32_pternlogd256_mask:
+  case X86::BI__builtin_ia32_pternlogd512_mask:
+  case X86::BI__builtin_ia32_pternlogq128_mask:
+  case X86::BI__builtin_ia32_pternlogq256_mask:
+  case X86::BI__builtin_ia32_pternlogq512_mask:
+    return interp__builtin_pternlog(S, OpPC, Call, false);
+  case X86::BI__builtin_ia32_pternlogd128_maskz:
+  case X86::BI__builtin_ia32_pternlogd256_maskz:
+  case X86::BI__builtin_ia32_pternlogd512_maskz:
+  case X86::BI__builtin_ia32_pternlogq128_maskz:
+  case X86::BI__builtin_ia32_pternlogq256_maskz:
+  case X86::BI__builtin_ia32_pternlogq512_maskz:
+    return interp__builtin_pternlog(S, OpPC, Call, true);
   case Builtin::BI__builtin_elementwise_fshl:
     return interp__builtin_elementwise_triop(S, OpPC, Call,
                                              llvm::APIntOps::fshl);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b706b14945b6d..dac6fdcd11db0 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12087,6 +12087,97 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
 
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
+  case X86::BI__builtin_ia32_pternlogd128_mask:
+  case X86::BI__builtin_ia32_pternlogd256_mask:
+  case X86::BI__builtin_ia32_pternlogd512_mask:
+  case X86::BI__builtin_ia32_pternlogq128_mask:
+  case X86::BI__builtin_ia32_pternlogq256_mask:
+  case X86::BI__builtin_ia32_pternlogq512_mask: {
+    APValue AValue, BValue, CValue, ImmValue, UValue;
+    if (!EvaluateAsRValue(Info, E->getArg(0), AValue) ||
+        !EvaluateAsRValue(Info, E->getArg(1), BValue) ||
+        !EvaluateAsRValue(Info, E->getArg(2), CValue) ||
+        !EvaluateAsRValue(Info, E->getArg(3), ImmValue) ||
+        !EvaluateAsRValue(Info, E->getArg(4), UValue))
+      return false;
+
+    QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+    bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+    APInt Imm = ImmValue.getInt();
+    APInt U = UValue.getInt();
+    unsigned ResultLen = AValue.getVectorLength();
+    SmallVector<APValue, 16> ResultElements;
+    ResultElements.reserve(ResultLen);
+
+    for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) {
+      APInt ALane = AValue.getVectorElt(EltNum).getInt();
+      APInt BLane = BValue.getVectorElt(EltNum).getInt();
+      APInt CLane = CValue.getVectorElt(EltNum).getInt();
+
+      if (U[EltNum]) {
+        unsigned BitWidth = ALane.getBitWidth();
+        APInt ResLane(BitWidth, 0);
+
+        for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
+          unsigned ABit = ALane[Bit];
+          unsigned BBit = BLane[Bit];
+          unsigned CBit = CLane[Bit];
+
+          unsigned Idx = (ABit << 2) | (BBit << 1) | CBit;
+          ResLane.setBitVal(Bit, Imm[Idx]);
+        }
+        ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned)));
+      } else {
+        ResultElements.push_back(APValue(APSInt(ALane, DestUnsigned)));
+      }
+    }
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
+  case X86::BI__builtin_ia32_pternlogd128_maskz:
+  case X86::BI__builtin_ia32_pternlogd256_maskz:
+  case X86::BI__builtin_ia32_pternlogd512_maskz:
+  case X86::BI__builtin_ia32_pternlogq128_maskz:
+  case X86::BI__builtin_ia32_pternlogq256_maskz:
+  case X86::BI__builtin_ia32_pternlogq512_maskz: {
+    APValue AValue, BValue, CValue, ImmValue, UValue;
+    if (!EvaluateAsRValue(Info, E->getArg(0), AValue) ||
+        !EvaluateAsRValue(Info, E->getArg(1), BValue) ||
+        !EvaluateAsRValue(Info, E->getArg(2), CValue) ||
+        !EvaluateAsRValue(Info, E->getArg(3), ImmValue) ||
+        !EvaluateAsRValue(Info, E->getArg(4), UValue))
+      return false;
+
+    QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+    bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+    APInt Imm = ImmValue.getInt();
+    APInt U = UValue.getInt();
+    unsigned ResultLen = AValue.getVectorLength();
+    SmallVector<APValue, 16> ResultElements;
+    ResultElements.reserve(ResultLen);
+
+    for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) {
+      APInt ALane = AValue.getVectorElt(EltNum).getInt();
+      APInt BLane = BValue.getVectorElt(EltNum).getInt();
+      APInt CLane = CValue.getVectorElt(EltNum).getInt();
+
+      unsigned BitWidth = ALane.getBitWidth();
+      APInt ResLane(BitWidth, 0);
+
+      if (U[EltNum]) {
+        for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
+          unsigned ABit = ALane[Bit];
+          unsigned BBit = BLane[Bit];
+          unsigned CBit = CLane[Bit];
+
+          unsigned Idx = (ABit << 2) | (BBit << 1) | CBit;
+          ResLane.setBitVal(Bit, Imm[Idx]);
+        }
+      }
+      ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned)));
+    }
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
+  
   case Builtin::BI__builtin_elementwise_clzg:
   case Builtin::BI__builtin_elementwise_ctzg: {
     APValue SourceLHS;
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 84eaad8d99e61..03cbb20ab0ed5 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -6273,6 +6273,27 @@ __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) {
   // CHECK: @llvm.x86.avx512.pternlog.d.512({{.*}}, i32 240)
   return _mm512_ternarylogic_epi32(__A, __B, __C, _MM_TERNLOG_A);
 }
+TEST_CONSTEXPR(match_v16si(
+  _mm512_ternarylogic_epi32(
+    ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+    ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+    ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+    (unsigned char)0xCA),
+  0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_ternarylogic_epi32(
+    ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0xFE),
+  0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_ternarylogic_epi32(
+    ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
 
 __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
   // CHECK-LABEL: test_mm512_mask_ternarylogic_epi32
@@ -6280,6 +6301,30 @@ __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i _
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_ternarylogic_epi32(__A, __U, __B, __C, _MM_TERNLOG_B);
 }
+TEST_CONSTEXPR(match_v16si(
+  _mm512_mask_ternarylogic_epi32(
+    ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+    (__mmask16)0x3333,
+    ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+    ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+    (unsigned char)0xCA),
+  0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_mask_ternarylogic_epi32(
+    ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    (__mmask16)0xCCCC,
+    ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0xFE),
+  0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_mask_ternarylogic_epi32(
+    ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    (__mmask16)0x5555,
+    ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9));
 
 __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   // CHECK-LABEL: test_mm512_maskz_ternarylogic_epi32
@@ -6287,12 +6332,57 @@ __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> zeroinitializer
   return _mm512_maskz_ternarylogic_epi32(__U, __A, __B, __C, _MM_TERNLOG_C);
 }
+TEST_CONSTEXPR(match_v16si(
+  _mm512_maskz_ternarylogic_epi32(
+    (__mmask16)0x3333,
+    ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+    ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+    ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+    (unsigned char)0xCA),
+  0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_maskz_ternarylogic_epi32(
+    (__mmask16)0xCCCC,
+    ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0xFE),
+  0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF));
+TEST_CONSTEXPR(match_v16si(
+  _mm512_maskz_ternarylogic_epi32(
+    (__mmask16)0x5555,
+    ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
 
 __m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) {
   // CHECK-LABEL: test_mm512_ternarylogic_epi64
   // CHECK: @llvm.x86.avx512.pternlog.q.512({{.*}}, i32 192)
   return _mm512_ternarylogic_epi64(__A, __B, __C, _MM_TERNLOG_A & _MM_TERNLOG_B);
 }
+TEST_CONSTEXPR(match_v8di(
+  _mm512_ternarylogic_epi64(
+    ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+    ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+    ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+    (unsigned char)0xCA),
+  0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC));
+TEST_CONSTEXPR(match_v8di(
+  _mm512_ternarylogic_epi64(
+    ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0xFE),
+  0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF));
+TEST_CONSTEXPR(match_v8di(
+  _mm512_ternarylogic_epi64(
+    ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
 
 __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) {
   // CHECK-LABEL: test_mm512_mask_ternarylogic_epi64
@@ -6300,6 +6390,30 @@ __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_ternarylogic_epi64(__A, __U, __B, __C, _MM_TERNLOG_B | _MM_TERNLOG_C);
 }
+TEST_CONSTEXPR(match_v8di(
+  _mm512_mask_ternarylogic_epi64(
+    ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+    (__mmask8)0x33,
+    ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+    ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+    (unsigned char)0xCA),
+  0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0));
+TEST_CONSTEXPR(match_v8di(
+  _mm512_mask_ternarylogic_epi64(
+    ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    (__mmask8)0xCC,
+    ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0xFE),
+  0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF));
+TEST_CONSTEXPR(match_v8di(
+  _mm512_mask_ternarylogic_epi64(
+    ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    (__mmask8)0x55,
+    ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9));
 
 __m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) {
   // CHECK-LABEL: test_mm512_maskz_ternarylogic_epi64
@@ -6307,6 +6421,30 @@ __m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i _
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> zeroinitializer
   return _mm512_maskz_ternarylogic_epi64(__U, __A, __B, __C, ~_MM_TERNLOG_A | (_MM_TERNLOG_B ^ _MM_TERNLOG_C));
 }
+TEST_CONSTEXPR(match_v8di(
+  _mm512_maskz_ternarylogic_epi64(
+    (__mmask8)0x33,
+    ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+    ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+    ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+    (unsigned char)0xCA),
+  0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0));
+TEST_CONSTEXPR(match_v8di(
+  _mm512_maskz_ternarylogic_epi64(
+    (__mmask8)0xCC,
+    ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0xFE),
+  0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF));
+TEST_CONSTEXPR(match_v8di(
+  _mm512_maskz_ternarylogic_epi64(
+    (__mmask8)0x55,
+    ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
 
 __m512 test_mm512_shuffle_f32x4(__m512 __A, __m512 __B) {
   // CHECK-LABEL: test_mm512_shuffle_f32x4
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 5282c7ab06dea..5a94532883d5f 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -8359,6 +8359,27 @@ __m128i test_mm_ternarylogic_epi32(__m128i __A, __m128i __B, __m128i __C) {
   // CHECK: @llvm.x86.avx512.pternlog.d.128
   return _mm_ternarylogic_epi32(__A, __B, __C, 4); 
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_ternarylogic_epi32(
+    ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})),
+    ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})),
+    ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })),
+    (unsigned char)0xCA),
+  0xB, 0xC, 0xB, 0xC));
+TEST_CONSTEXPR(match_v4si(
+  _mm_ternarylogic_epi32(
+    ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
+    ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
+    ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0xFE),
+  0xF, 0xF, 0xF, 0xF));
+TEST_CONSTEXPR(match_v4si(
+  _mm_ternarylogic_epi32(
+    ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
+    ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
+    ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x0, 0x0, 0x0));
 
 __m128i test_mm_mask_ternarylogic_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
   // CHECK-LABEL: test_mm_mask_ternarylogic_epi32
@@ -8366,6 +8387,30 @@ __m128i test_mm_mask_ternarylogic_epi32(__m128i __A, __mmask8 __U, __m128i __B,
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_mask_ternarylogic_epi32(__A, __U, __B, __C, 4); 
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_mask_ternarylogic_epi32(
+    ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})),
+    (__mmask8)0x03,
+    ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})),
+    ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })),
+    (unsigned char)0xCA),
+  0xB, 0xC, -0x1, 0x0));
+TEST_CONSTEXPR(match_v4si(
+  _mm_mask_ternarylogic_epi32(
+    ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
+    (__mmask8)0x0C,
+    ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
+    ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0xFE),
+  0x9, 0x9, 0xF, 0xF));
+TEST_CONSTEXPR(match_v4si(
+  _mm_mask_ternarylogic_epi32(
+    ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
+    (__mmask8)0x05,
+    ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
+    ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x9, 0x0, 0x9));
 
 __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   // CHECK-LABEL: test_mm_maskz_ternarylogic_epi32
@@ -8373,12 +8418,57 @@ __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B,
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
   return _mm_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4); 
 }
+TEST_CONSTEXPR(match_v4si(
+  _mm_maskz_ternarylogic_epi32(
+    (__mmask8)0x03,
+    ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})),
+    ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})),
+    ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })),
+    (unsigned char)0xCA),
+  0xB, 0xC, 0x0, 0x0));
+TEST_CONSTEXPR(match_v4si(
+  _mm_maskz_ternarylogic_epi32(
+    (__mmask8)0x0C,
+    ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
+    ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
+    ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0xFE),
+  0x0, 0x0, 0xF, 0xF));
+TEST_CONSTEXPR(match_v4si(
+  _mm_maskz_ternarylogic_epi32(
+    (__mmask8)0x05,
+    ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
+    ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
+    ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x0, 0x0, 0x0));
 
 __m256i test_mm256_ternarylogic_epi32(__m256i __A, __m256i __B, __m256i __C) {
   // CHECK-LABEL: test_mm256_ternarylogic_epi32
   // CHECK: @llvm.x86.avx512.pternlog.d.256
   return _mm256_ternarylogic_epi32(__A, __B, __C, 4); 
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_ternarylogic_epi32(
+    ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+    ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+    ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+    (unsigned char)0xCA),
+  0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_ternarylogic_epi32(
+    ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0xFE),
+  0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_ternarylogic_epi32(
+    ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
 
 __m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
   // CHECK-LABEL: test_mm256_mask_ternarylogic_epi32
@@ -8386,6 +8476,30 @@ __m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_ternarylogic_epi32(__A, __U, __B, __C, 4); 
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_mask_ternarylogic_epi32(
+    ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+    (__mmask8)0x33,
+    ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+    ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+    (unsigned char)0xCA),
+  0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_mask_ternarylogic_epi32(
+    ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    (__mmask8)0xCC,
+    ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0xFE),
+  0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_mask_ternarylogic_epi32(
+    ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    (__mmask8)0x55,
+    ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9));
 
 __m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   // CHECK-LABEL: test_mm256_maskz_ternarylogic_epi32
@@ -8393,12 +8507,57 @@ __m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i _
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> zeroinitializer
   return _mm256_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4); 
 }
+TEST_CONSTEXPR(match_v8si(
+  _mm256_maskz_ternarylogic_epi32(
+    (__mmask8)0x33,
+    ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+    ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+    ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+    (unsigned char)0xCA),
+  0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_maskz_ternarylogic_epi32(
+    (__mmask8)0xCC,
+    ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0xFE),
+  0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF));
+TEST_CONSTEXPR(match_v8si(
+  _mm256_maskz_ternarylogic_epi32(
+    (__mmask8)0x55,
+    ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+    ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
 
 __m128i test_mm_ternarylogic_epi64(__m128i __A, __m128i __B, __m128i __C) {
   // CHECK-LABEL: test_mm_ternarylogic_epi64
   // CHECK: @llvm.x86.avx512.pternlog.q.128
   return _mm_ternarylogic_epi64(__A, __B, __C, 4); 
 }
+TEST_CONSTEXPR(match_v2di(
+  _mm_ternarylogic_epi64(
+    ((__m128i)((__v2di){-0x1, 0x0})),
+    ((__m128i)((__v2di){0xB, 0xB})),
+    ((__m128i)((__v2di){0xC, 0xC})),
+    (unsigned char)0xCA),
+  0xB, 0xC));
+TEST_CONSTEXPR(match_v2di(
+  _mm_ternarylogic_epi64(
+    ((__m128i)((__v2di){0x9, 0x9})),
+    ((__m128i)((__v2di){0x4, 0x4})),
+    ((__m128i)((__v2di){0x2, 0x2})),
+    (unsigned char)0xFE),
+  0xF, 0xF));
+TEST_CONSTEXPR(match_v2di(
+  _mm_ternarylogic_epi64(
+    ((__m128i)((__v2di){0x9, 0x9})),
+    ((__m128i)((__v2di){0x4, 0x4})),
+    ((__m128i)((__v2di){0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x0));
 
 __m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
   // CHECK-LABEL: test_mm_mask_ternarylogic_epi64
@@ -8406,6 +8565,30 @@ __m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B,
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm_mask_ternarylogic_epi64(__A, __U, __B, __C, 4); 
 }
+TEST_CONSTEXPR(match_v2di(
+  _mm_mask_ternarylogic_epi64(
+    ((__m128i)((__v2di){-0x1, 0x0})),
+    (__mmask8)0x33,
+    ((__m128i)((__v2di){0xB, 0xB})),
+    ((__m128i)((__v2di){0xC, 0xC})),
+    (unsigned char)0xCA),
+  0xB, 0xC));
+TEST_CONSTEXPR(match_v2di(
+  _mm_mask_ternarylogic_epi64(
+    ((__m128i)((__v2di){0x9, 0x9})),
+    (__mmask8)0xCC,
+    ((__m128i)((__v2di){0x4, 0x4})),
+    ((__m128i)((__v2di){0x2, 0x2})),
+    (unsigned char)0xFE),
+  0x9, 0x9));
+TEST_CONSTEXPR(match_v2di(
+  _mm_mask_ternarylogic_epi64(
+    ((__m128i)((__v2di){0x9, 0x9})),
+    (__mmask8)0x55,
+    ((__m128i)((__v2di){0x4, 0x4})),
+    ((__m128i)((__v2di){0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x9));
 
 __m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   // CHECK-LABEL: test_mm_maskz_ternarylogic_epi64
@@ -8413,12 +8596,57 @@ __m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B,
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> zeroinitializer
   return _mm_maskz_ternarylogic_epi64(__U, __A, __B, __C, 4); 
 }
+TEST_CONSTEXPR(match_v2di(
+  _mm_maskz_ternarylogic_epi64(
+    (__mmask8)0x03,
+    ((__m128i)((__v2di){-0x1, 0x0})),
+    ((__m128i)((__v2di){0xB, 0xB})),
+    ((__m128i)((__v2di){0xC, 0xC})),
+    (unsigned char)0xCA),
+  0xB, 0xC));
+TEST_CONSTEXPR(match_v2di(
+  _mm_maskz_ternarylogic_epi64(
+    (__mmask8)0x0C,
+    ((__m128i)((__v2di){0x9, 0x9})),
+    ((__m128i)((__v2di){0x4, 0x4})),
+    ((__m128i)((__v2di){0x2, 0x2})),
+    (unsigned char)0xFE),
+  0x0, 0x0));
+TEST_CONSTEXPR(match_v2di(
+  _mm_maskz_ternarylogic_epi64(
+    (__mmask8)0x05,
+    ((__m128i)((__v2di){0x9, 0x9})),
+    ((__m128i)((__v2di){0x4, 0x4})),
+    ((__m128i)((__v2di){0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x0));
 
 __m256i test_mm256_ternarylogic_epi64(__m256i __A, __m256i __B, __m256i __C) {
   // CHECK-LABEL: test_mm256_ternarylogic_epi64
   // CHECK: @llvm.x86.avx512.pternlog.q.256
   return _mm256_ternarylogic_epi64(__A, __B, __C, 4); 
 }
+TEST_CONSTEXPR(match_v4di(
+  _mm256_ternarylogic_epi64(
+    ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})),
+    ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})),
+    ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})),
+    (unsigned char)0xCA),
+  0xB, 0xC, 0xB, 0xC));
+TEST_CONSTEXPR(match_v4di(
+  _mm256_ternarylogic_epi64(
+    ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
+    ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
+    ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0xFE),
+  0xF, 0xF, 0xF, 0xF));
+TEST_CONSTEXPR(match_v4di(
+  _mm256_ternarylogic_epi64(
+    ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
+    ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
+    ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x0, 0x0, 0x0));
 
 __m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
   // CHECK-LABEL: test_mm256_mask_ternarylogic_epi64
@@ -8426,6 +8654,30 @@ __m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_mask_ternarylogic_epi64(__A, __U, __B, __C, 4); 
 }
+TEST_CONSTEXPR(match_v4di(
+  _mm256_mask_ternarylogic_epi64(
+    ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})),
+    (__mmask8)0x33,
+    ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})),
+    ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})),
+    (unsigned char)0xCA),
+  0xB, 0xC, -0x1, 0x0));
+TEST_CONSTEXPR(match_v4di(
+  _mm256_mask_ternarylogic_epi64(
+    ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
+    (__mmask8)0xCC,
+    ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
+    ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0xFE),
+  0x9, 0x9, 0xF, 0xF));
+TEST_CONSTEXPR(match_v4di(
+  _mm256_mask_ternarylogic_epi64(
+    ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
+    (__mmask8)0x55,
+    ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
+    ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x9, 0x0, 0x9));
 
 __m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   // CHECK-LABEL: test_mm256_maskz_ternarylogic_epi64
@@ -8433,6 +8685,31 @@ __m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i _
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> zeroinitializer
   return _mm256_maskz_ternarylogic_epi64(__U, __A, __B, __C, 4); 
 }
+TEST_CONSTEXPR(match_v4di(
+  _mm256_maskz_ternarylogic_epi64(
+    (__mmask8)0x33,
+    ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})),
+    ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})),
+    ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})),
+    (unsigned char)0xCA),
+  0xB, 0xC, 0x0, 0x0));
+TEST_CONSTEXPR(match_v4di(
+  _mm256_maskz_ternarylogic_epi64(
+    (__mmask8)0xCC,
+    ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
+    ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
+    ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0xFE),
+  0x0, 0x0, 0xF, 0xF));
+TEST_CONSTEXPR(match_v4di(
+  _mm256_maskz_ternarylogic_epi64(
+    (__mmask8)0x55,
+    ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
+    ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
+    ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
+    (unsigned char)0x80),
+  0x0, 0x0, 0x0, 0x0));
+
 __m256 test_mm256_shuffle_f32x4(__m256 __A, __m256 __B) {
   // CHECK-LABEL: test_mm256_shuffle_f32x4
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>

>From 6936047dc512f64095167de6d7d3673c7c31362e Mon Sep 17 00:00:00 2001
From: Shawn <kimshawn02 at icloud.com>
Date: Tue, 16 Sep 2025 00:05:54 -0700
Subject: [PATCH 02/48] Apply suggestion from @tbaederr

Co-authored-by: Timm Baeder <tbaeder at redhat.com>
---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index e04705ac7e6ee..31846e0d3d0dd 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2893,7 +2893,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
 
   const Pointer &Dst = S.Stk.peek<Pointer>();
 
-  for (unsigned I = 0; I < DstLen; ++I) {
+  for (unsigned I = 0; I != DstLen; ++I) {
     APSInt a, b, c;
     INT_TYPE_SWITCH(DstElemT, {
       a = A.elem<T>(I).toAPSInt();

>From f9160faf510c98c926a8fd72d3a627f205d97c46 Mon Sep 17 00:00:00 2001
From: Shawn <kimshawn02 at icloud.com>
Date: Tue, 16 Sep 2025 00:06:30 -0700
Subject: [PATCH 03/48] Apply suggestion from @tbaederr

Co-authored-by: Timm Baeder <tbaeder at redhat.com>
---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 31846e0d3d0dd..748f8891d9205 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2906,7 +2906,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
     bool DstUnsigned = a.isUnsigned();
 
     if (U[I]) {
-      for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
+      for (unsigned Bit = 0; Bit != BitWidth; ++Bit) {
         unsigned Idx = (a[Bit] << 2) | (b[Bit] << 1) | (c[Bit]);
         R.setBitVal(Bit, Imm[Idx]);
       }

>From c97d923a78a3ffee6b531218548e3238b8d8c433 Mon Sep 17 00:00:00 2001
From: Shawn <kimshawn02 at icloud.com>
Date: Tue, 16 Sep 2025 00:09:14 -0700
Subject: [PATCH 04/48] Apply suggestion from @tbaederr

Co-authored-by: Timm Baeder <tbaeder at redhat.com>
---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 748f8891d9205..c777f44f5e071 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3714,7 +3714,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_pternlogq128_mask:
   case X86::BI__builtin_ia32_pternlogq256_mask:
   case X86::BI__builtin_ia32_pternlogq512_mask:
-    return interp__builtin_pternlog(S, OpPC, Call, false);
+    return interp__builtin_pternlog(S, OpPC, Call, /*MaskZ=*/false);
   case X86::BI__builtin_ia32_pternlogd128_maskz:
   case X86::BI__builtin_ia32_pternlogd256_maskz:
   case X86::BI__builtin_ia32_pternlogd512_maskz:

>From 33b638d17c6593aa7f0aaaeae9620617375f6b25 Mon Sep 17 00:00:00 2001
From: Shawn <kimshawn02 at icloud.com>
Date: Tue, 16 Sep 2025 00:09:42 -0700
Subject: [PATCH 05/48] Apply suggestion from @tbaederr

Co-authored-by: Timm Baeder <tbaeder at redhat.com>
---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index c777f44f5e071..3ca0531152758 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3721,7 +3721,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_pternlogq128_maskz:
   case X86::BI__builtin_ia32_pternlogq256_maskz:
   case X86::BI__builtin_ia32_pternlogq512_maskz:
-    return interp__builtin_pternlog(S, OpPC, Call, true);
+    return interp__builtin_pternlog(S, OpPC, Call, /*MaskZ=*/true);
   case Builtin::BI__builtin_elementwise_fshl:
     return interp__builtin_elementwise_triop(S, OpPC, Call,
                                              llvm::APIntOps::fshl);

>From dd305dcec3b1318e142be4de8df8e5d406702aa9 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 16 Sep 2025 00:47:53 -0700
Subject: [PATCH 06/48] Apply feedback: Use PascalCase

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 26 ++++++++++++------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 3ca0531152758..4c8abd7172109 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2894,32 +2894,32 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
   const Pointer &Dst = S.Stk.peek<Pointer>();
 
   for (unsigned I = 0; I != DstLen; ++I) {
-    APSInt a, b, c;
+    APSInt ALane, BLane, CLane;
     INT_TYPE_SWITCH(DstElemT, {
-      a = A.elem<T>(I).toAPSInt();
-      b = B.elem<T>(I).toAPSInt();
-      c = C.elem<T>(I).toAPSInt();
+      ALane = A.elem<T>(I).toAPSInt();
+      BLane = B.elem<T>(I).toAPSInt();
+      CLane = C.elem<T>(I).toAPSInt();
     });
 
-    unsigned BitWidth = a.getBitWidth();
-    APInt R(BitWidth, 0);
-    bool DstUnsigned = a.isUnsigned();
+    unsigned BitWidth = ALane.getBitWidth();
+    APInt RLane(BitWidth, 0);
+    bool DstUnsigned = ALane.isUnsigned();
 
     if (U[I]) {
       for (unsigned Bit = 0; Bit != BitWidth; ++Bit) {
-        unsigned Idx = (a[Bit] << 2) | (b[Bit] << 1) | (c[Bit]);
-        R.setBitVal(Bit, Imm[Idx]);
+        unsigned Idx = (ALane[Bit] << 2) | (BLane[Bit] << 1) | (CLane[Bit]);
+        RLane.setBitVal(Bit, Imm[Idx]);
       }
       INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
-        Dst.elem<T>(I) = static_cast<T>(APSInt(R, DstUnsigned));
+        Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
       });
     } else if (MaskZ) {
-      INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
-        Dst.elem<T>(I) = static_cast<T>(APSInt(R, DstUnsigned));
+      INT_TYPE_SWITCH_NO_BOOL(DstElemT, { /* Zeroes lane */
+        Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
       });
     } else {
       INT_TYPE_SWITCH_NO_BOOL(DstElemT,
-                              { Dst.elem<T>(I) = static_cast<T>(a); });
+                              { Dst.elem<T>(I) = static_cast<T>(ALane); });
     }
   }
   Dst.initializeAllElements();

>From 916079d5d689f975b41dc9e3cb526b59bf4ea8ba Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 16 Sep 2025 00:53:17 -0700
Subject: [PATCH 07/48] Clang-format

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 4c8abd7172109..936dd546e7442 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2914,7 +2914,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
         Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
       });
     } else if (MaskZ) {
-      INT_TYPE_SWITCH_NO_BOOL(DstElemT, { /* Zeroes lane */
+      INT_TYPE_SWITCH_NO_BOOL(DstElemT, { // Zeroes lane
         Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
       });
     } else {

>From c7222db83eb70319de37dbfe1998c3d412b82bde Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Wed, 17 Sep 2025 23:26:28 -0700
Subject: [PATCH 08/48] Apply feedback: Rebase and refactor lines

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 936dd546e7442..46cace519c3f7 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2883,10 +2883,10 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
 
   const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>();
   unsigned DstLen = VecT->getNumElements();
-  PrimType DstElemT = *S.getContext().classify(VecT->getElementType());
+  const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType());
 
-  APSInt U = popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(4)));
-  APSInt Imm = popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(3)));
+  APSInt U = popToAPSInt(S, Call->getArg(4));
+  APSInt Imm = popToAPSInt(S, Call->getArg(3));
   const Pointer &C = S.Stk.pop<Pointer>();
   const Pointer &B = S.Stk.pop<Pointer>();
   const Pointer &A = S.Stk.pop<Pointer>();

>From 1d13362d91d3bc5b33da737441ab3994515666d8 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Fri, 19 Sep 2025 14:41:05 -0700
Subject: [PATCH 09/48] Apply feedback: Rebase with main

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 46cace519c3f7..f51e4a3d18c4f 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2923,6 +2923,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
     }
   }
   Dst.initializeAllElements();
+  return true;
 }
 
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,

>From fbb9ced09422389965595c93a57eaf14cd01f16f Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Fri, 19 Sep 2025 14:47:39 -0700
Subject: [PATCH 10/48] Apply feedback: Rebase with main

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index f51e4a3d18c4f..5678339597a83 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2900,7 +2900,6 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
       BLane = B.elem<T>(I).toAPSInt();
       CLane = C.elem<T>(I).toAPSInt();
     });
-
     unsigned BitWidth = ALane.getBitWidth();
     APInt RLane(BitWidth, 0);
     bool DstUnsigned = ALane.isUnsigned();

>From 97eb8465677e5d6753af18051d42595415e17699 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Mon, 22 Sep 2025 21:16:42 -0700
Subject: [PATCH 11/48] Upload failing testcase for now

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp  | 10 ++-
 clang/test/CodeGen/X86/avx512f-builtins.c | 89 +++++++++++++----------
 2 files changed, 58 insertions(+), 41 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 5678339597a83..d540f303d9134 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2882,7 +2882,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
   assert(Call->getNumArgs() == 5);
 
   const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>();
-  unsigned DstLen = VecT->getNumElements();
+  const unsigned DstLen = VecT->getNumElements();
   const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType());
 
   APSInt U = popToAPSInt(S, Call->getArg(4));
@@ -2895,7 +2895,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
 
   for (unsigned I = 0; I != DstLen; ++I) {
     APSInt ALane, BLane, CLane;
-    INT_TYPE_SWITCH(DstElemT, {
+    INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
       ALane = A.elem<T>(I).toAPSInt();
       BLane = B.elem<T>(I).toAPSInt();
       CLane = C.elem<T>(I).toAPSInt();
@@ -2906,7 +2906,11 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
 
     if (U[I]) {
       for (unsigned Bit = 0; Bit != BitWidth; ++Bit) {
-        unsigned Idx = (ALane[Bit] << 2) | (BLane[Bit] << 1) | (CLane[Bit]);
+        unsigned ABit = ALane[Bit];
+        unsigned BBit = BLane[Bit];
+        unsigned CBit = CLane[Bit];
+
+        unsigned Idx = (ABit << 2) | (BBit << 1) | (CBit);
         RLane.setBitVal(Bit, Imm[Idx]);
       }
       INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 03cbb20ab0ed5..5b13807014520 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -6278,21 +6278,21 @@ TEST_CONSTEXPR(match_v16si(
     ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
     ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
     ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
-    (unsigned char)0xCA),
+    (unsigned char)0xCA), // A ? B : C
   0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC));
 TEST_CONSTEXPR(match_v16si(
   _mm512_ternarylogic_epi32(
     ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
     ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
     ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0xFE),
+    (unsigned char)0xFE), // A | B | C
   0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF));
 TEST_CONSTEXPR(match_v16si(
   _mm512_ternarylogic_epi32(
     ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
     ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
     ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80),
+    (unsigned char)0x80), // A & B & C
   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
 
 __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
@@ -6303,28 +6303,37 @@ __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i _
 }
 TEST_CONSTEXPR(match_v16si(
   _mm512_mask_ternarylogic_epi32(
-    ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
-    (__mmask16)0x3333,
-    ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
-    ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
-    (unsigned char)0xCA),
-  0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0));
+    ((__m512i)((__v16si){0x1, 0x0, 0x2, 0x0, 0x3, 0x0, 0x4, 0x0,
+                         0x5, 0x0, 0x6, 0x0, 0x7, 0x0, 0x8, 0x0})),
+    (__mmask16)0xA55A,
+    ((__m512i)((__v16si){0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10, 0x11,
+                         0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19})),
+    ((__m512i)((__v16si){0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
+                         0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10})),
+    (unsigned char)0xCA), // A ? B : C
+  0x1, 0x2, 0x2, 0x4, 0x6, 0x0, 0x3, 0x0, 0x8, 0x0, 0xD, 0x0, 0x7, 0xE, 0x8, 0x10));
 TEST_CONSTEXPR(match_v16si(
   _mm512_mask_ternarylogic_epi32(
-    ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    (__mmask16)0xCCCC,
-    ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
-    ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0xFE),
-  0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF));
+    ((__m512i)((__v16si){0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
+                         0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF})),
+    (__mmask16)0x0F0F,
+    ((__m512i)((__v16si){0x1, 0x2, 0x4, 0x8, 0x1, 0x2, 0x4, 0x8,
+                         0x1, 0x2, 0x4, 0x8, 0x1, 0x2, 0x4, 0x8})),
+    ((__m512i)((__v16si){0x10, 0x20, 0x40, 0x80, 0x10, 0x20, 0x40, 0x80,
+                         0x10, 0x20, 0x40, 0x80, 0x10, 0x20, 0x40, 0x80})),
+    (unsigned char)0xFE), // A | B | C
+  0x11, 0x23, 0x46, 0x8B, 0x4, 0x5, 0x6, 0x7, 0x19, 0x2B, 0x4E, 0x8B, 0xC, 0xD, 0xE, 0xF));
 TEST_CONSTEXPR(match_v16si(
   _mm512_mask_ternarylogic_epi32(
-    ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    (__mmask16)0x5555,
-    ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
-    ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80),
-  0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9));
+    ((__m512i)((__v16si){0xF, 0x7, 0x3, 0x1, 0xF, 0x7, 0x3, 0x1,
+                         0xFF, 0xF, 0xF0, 0xAA, 0x55, 0xCC, 0x33, 0xFF})),
+    (__mmask16)0xAAAA,
+    ((__m512i)((__v16si){0xE, 0x7, 0x2, 0x1, 0xF, 0x0, 0x3, 0x0,
+                         0xF, 0xF0, 0xFF, 0x55, 0x55, 0x33, 0x33, 0xF})),
+    ((__m512i)((__v16si){0xD, 0x7, 0x0, 0x1, 0xF, 0x7, 0x0, 0x1,
+                         0xF0, 0xF, 0xF, 0xFF, 0xF, 0xCC, 0x33, 0xF0})),
+    (unsigned char)0x80), // A & B & C
+  0xF, 0x7, 0x3, 0x1, 0xF, 0x0, 0x3, 0x0, 0xFF, 0x0, 0xF0, 0x0, 0x55, 0x0, 0x33, 0x0));
 
 __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   // CHECK-LABEL: test_mm512_maskz_ternarylogic_epi32
@@ -6338,7 +6347,7 @@ TEST_CONSTEXPR(match_v16si(
     ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
     ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
     ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
-    (unsigned char)0xCA),
+    (unsigned char)0xCA), // A ? B : C
   0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0));
 TEST_CONSTEXPR(match_v16si(
   _mm512_maskz_ternarylogic_epi32(
@@ -6346,7 +6355,7 @@ TEST_CONSTEXPR(match_v16si(
     ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
     ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
     ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0xFE),
+    (unsigned char)0xFE), // A | B | C
   0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF));
 TEST_CONSTEXPR(match_v16si(
   _mm512_maskz_ternarylogic_epi32(
@@ -6354,7 +6363,7 @@ TEST_CONSTEXPR(match_v16si(
     ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
     ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
     ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80),
+    (unsigned char)0x80), // A & B & C
   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
 
 __m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) {
@@ -6364,25 +6373,29 @@ __m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) {
 }
 TEST_CONSTEXPR(match_v8di(
   _mm512_ternarylogic_epi64(
+    ((__m512i)((__v8di){0x1111, 0x2222, 0x3333, 0x4444, 0x5555, 0x6666, 0x7777, 0x8888})),
+    ((__m512i)((__v8di){0xAAAA, 0xBBBB, 0xCCCC, 0xDDDD, 0xEEEE, 0xFFFF, 0x1111, 0x2222})),
     ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
-    ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
-    ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
-    (unsigned char)0xCA),
-  0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC));
+    (unsigned char)0xD8), // C ? B : A
+  0xAAAA, 0x2222, 0xCCCC, 0x4444, 0xEEEE, 0x6666, 0x1111, 0x8888));
 TEST_CONSTEXPR(match_v8di(
   _mm512_ternarylogic_epi64(
-    ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
-    ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0xFE),
-  0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF));
+    ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, 0xF0F0, 0xFF, -0x5555555555555556, 0x5555555555555555})),
+    ((__m512i)((__v8di){0x1234, 0xFFFF, 0xFF, 0xF0F, 0x3333, 0xFF00, -0x5555555555555556, -0x0F0F0F0F0F0F0F10})),
+    ((__m512i)((__v8di){0xFFFF, 0x1234, 0xF0F, 0xFF00, 0xF0F0, 0x3333, 0x5555555555555555, 0x0F0F0F0F0F0F0F0})),
+    (unsigned char)0x8F), // ~A | (B & C)
+  0x1234, -0x1, 0xF, -0x1, -0xC0C1, -0x100, 0x5555555555555555, -0x5505050505050506));
 TEST_CONSTEXPR(match_v8di(
   _mm512_ternarylogic_epi64(
-    ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
-    ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80),
-  0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
+    ((__m512i)((__v8di){0x7FFFFFFFFFFFFFFF, 0x0, 0x00FF00FF00FF00FF, 0x0F0F0F0F0F0F0F0F,
+                        0x123456789ABCDEF0, 0x3333333333333333, 0x5555555555555555, 0x0123456789ABCDEF})),
+    ((__m512i)((__v8di){0x1111111111111111, 0x2222222222222222, 0xFFFFFFFF, -0x100000000,
+                        0x0, -0x3333333333333334, -0x0F0F0F0F0F0F0F10, -0x123456789ABCDF0})),
+    ((__m512i)((__v8di){0x2222222222222222, 0x1111111111111111, -0x1000000000000, 0xFFFFFFFF,
+                        -0x1, 0x0, 0x0F0F0F0F0F0F0F0F, 0x0})),
+    (unsigned char)0xE0), // A & (B | C)
+  0x3333333333333333, 0x0, 0x00FF000000FF00FF, 0x0F0F0F0F0F0F0F0F,
+  0x123456789ABCDEF0, 0x0, 0x5555555555555555, 0x0));
 
 __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) {
   // CHECK-LABEL: test_mm512_mask_ternarylogic_epi64

>From c7c07e338eef7519607793422804deed7dec750a Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Wed, 24 Sep 2025 08:14:40 -0700
Subject: [PATCH 12/48] Save debug changes

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index d540f303d9134..13d9b01bd8e0c 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2884,9 +2884,10 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
   const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>();
   const unsigned DstLen = VecT->getNumElements();
   const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType());
+  const bool DstUnsigned = VecT->isUnsignedIntegerOrEnumerationType();
 
-  APSInt U = popToAPSInt(S, Call->getArg(4));
-  APSInt Imm = popToAPSInt(S, Call->getArg(3));
+  APInt U = popToAPSInt(S, Call->getArg(4));
+  APInt Imm = popToAPSInt(S, Call->getArg(3));
   const Pointer &C = S.Stk.pop<Pointer>();
   const Pointer &B = S.Stk.pop<Pointer>();
   const Pointer &A = S.Stk.pop<Pointer>();
@@ -2894,7 +2895,9 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
   const Pointer &Dst = S.Stk.peek<Pointer>();
 
   for (unsigned I = 0; I != DstLen; ++I) {
-    APSInt ALane, BLane, CLane;
+    APInt ALane;
+    APInt BLane;
+    APInt CLane;
     INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
       ALane = A.elem<T>(I).toAPSInt();
       BLane = B.elem<T>(I).toAPSInt();
@@ -2902,7 +2905,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
     });
     unsigned BitWidth = ALane.getBitWidth();
     APInt RLane(BitWidth, 0);
-    bool DstUnsigned = ALane.isUnsigned();
+    // bool DstUnsigned = ALane.isUnsigned();
 
     if (U[I]) {
       for (unsigned Bit = 0; Bit != BitWidth; ++Bit) {
@@ -2922,7 +2925,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
       });
     } else {
       INT_TYPE_SWITCH_NO_BOOL(DstElemT,
-                              { Dst.elem<T>(I) = static_cast<T>(ALane); });
+                              { Dst.elem<T>(I) = static_cast<T>(APSInt(ALane, DstUnsigned)); });
     }
   }
   Dst.initializeAllElements();

>From b822c2d80f1cf4de7089ff4155bd19da0be08baa Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 25 Sep 2025 18:52:33 -0700
Subject: [PATCH 13/48] Save debug changes

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 13d9b01bd8e0c..ee298a92343fa 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -20,6 +20,10 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SipHash.h"
 
+#include <iostream>
+#include "llvm/Support/Debug.h"
+#define DEBUG_TYPE "interp-builtin"
+
 namespace clang {
 namespace interp {
 
@@ -2881,6 +2885,9 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
                                      const CallExpr *Call, bool MaskZ) {
   assert(Call->getNumArgs() == 5);
 
+  // LLVM_DEBUG(llvm::dbgs() << "Debug\n");
+  std::cout << "Debug here\n";
+
   const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>();
   const unsigned DstLen = VecT->getNumElements();
   const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType());

>From 79e065f4d6f03e2d238e3d4119e2a8c02355200e Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Sat, 27 Sep 2025 21:30:33 -0700
Subject: [PATCH 14/48] Save debug changes

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp   | 23 +++++++-------
 clang/test/CodeGen/X86/avx512f-builtins.c  |  5 +--
 clang/test/CodeGen/X86/avx512vl-builtins.c | 36 +++++++++++++++++++---
 3 files changed, 44 insertions(+), 20 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ee298a92343fa..c5cfb85ca7439 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -20,10 +20,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SipHash.h"
 
-#include <iostream>
-#include "llvm/Support/Debug.h"
-#define DEBUG_TYPE "interp-builtin"
-
 namespace clang {
 namespace interp {
 
@@ -2885,16 +2881,13 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
                                      const CallExpr *Call, bool MaskZ) {
   assert(Call->getNumArgs() == 5);
 
-  // LLVM_DEBUG(llvm::dbgs() << "Debug\n");
-  std::cout << "Debug here\n";
-
   const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>();
   const unsigned DstLen = VecT->getNumElements();
   const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType());
-  const bool DstUnsigned = VecT->isUnsignedIntegerOrEnumerationType();
+  const bool DstUnsigned = VecT->getElementType()->isUnsignedIntegerOrEnumerationType();
 
-  APInt U = popToAPSInt(S, Call->getArg(4));
-  APInt Imm = popToAPSInt(S, Call->getArg(3));
+  const APInt U = popToAPSInt(S, Call->getArg(4));
+  const APInt Imm = popToAPSInt(S, Call->getArg(3));
   const Pointer &C = S.Stk.pop<Pointer>();
   const Pointer &B = S.Stk.pop<Pointer>();
   const Pointer &A = S.Stk.pop<Pointer>();
@@ -2910,9 +2903,17 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
       BLane = B.elem<T>(I).toAPSInt();
       CLane = C.elem<T>(I).toAPSInt();
     });
-    unsigned BitWidth = ALane.getBitWidth();
+    const unsigned BitWidth = ALane.getBitWidth();
     APInt RLane(BitWidth, 0);
     // bool DstUnsigned = ALane.isUnsigned();
+    
+    #define DEBUG_TYPE "ptern"
+    LLVM_DEBUG({
+      ALane.print(llvm::dbgs(), false); llvm::dbgs() << "\n";
+      BLane.print(llvm::dbgs(), false); llvm::dbgs() << "\n";
+      CLane.print(llvm::dbgs(), false); llvm::dbgs() << "\n";
+      RLane.print(llvm::dbgs(), false); llvm::dbgs() << "\n";
+    });
 
     if (U[I]) {
       for (unsigned Bit = 0; Bit != BitWidth; ++Bit) {
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 5b13807014520..7233999787218 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3,10 +3,7 @@
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
 
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+
 
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 5a94532883d5f..d786b5fb8ddbf 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -1,8 +1,7 @@
 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
 
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+
 
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
@@ -8393,7 +8392,7 @@ TEST_CONSTEXPR(match_v4si(
     (__mmask8)0x03,
     ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})),
     ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })),
-    (unsigned char)0xCA),
+    (unsigned char)0xCA), // A ? B : C
   0xB, 0xC, -0x1, 0x0));
 TEST_CONSTEXPR(match_v4si(
   _mm_mask_ternarylogic_epi32(
@@ -8401,7 +8400,7 @@ TEST_CONSTEXPR(match_v4si(
     (__mmask8)0x0C,
     ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
     ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0xFE),
+    (unsigned char)0xFE), // A | B | C
   0x9, 0x9, 0xF, 0xF));
 TEST_CONSTEXPR(match_v4si(
   _mm_mask_ternarylogic_epi32(
@@ -8409,7 +8408,7 @@ TEST_CONSTEXPR(match_v4si(
     (__mmask8)0x05,
     ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
     ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80),
+    (unsigned char)0x80), // A & B & C
   0x0, 0x9, 0x0, 0x9));
 
 __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
@@ -8418,6 +8417,33 @@ __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B,
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
   return _mm_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4); 
 }
+// B ? A : C   imm = 0xE2  (Idx = (A<<2)|(B<<1)|C per VPTERNLOG)
+TEST_CONSTEXPR(match_v4si(
+  _mm_maskz_ternarylogic_epi32(
+    (__mmask8)0x0B,
+    ((__m128i)((__v4si){(int)0xDEADBEEF, 0, (int)0xFFFFFFFF, 0x13579BDF})), // A
+    ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, 0})),          // B
+    ((__m128i)((__v4si){(int)0xCAFEBABE, (int)0xFFFFFFFF, 0, 0x2468ACE0})), // C
+    (unsigned char)0xE2),
+  (int)0xDEADBEEF, (int)0xFFFFFFFF, 0, 0x2468ACE0));
+  // ~(A & B) | ~(B & C)   imm = 0x7F
+TEST_CONSTEXPR(match_v4si(
+  _mm_maskz_ternarylogic_epi32(
+    (__mmask8)0x0C,
+    ((__m128i)((__v4si){0, (int)0xFFFFFFFF, (int)0xAAAAAAAA, 0x55555555})), // A
+    ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, (int)0xFFFFFFFF})), // B
+    ((__m128i)((__v4si){(int)0xF0F0F0F0, 0, 0, (int)0xFFFFFFFF})),           // C
+    (unsigned char)0x7F),
+  0, 0, (int)0xFFFFFFFF, (int)0xAAAAAAAA));
+  // ~A | ~B | C   imm = 0xBF
+TEST_CONSTEXPR(match_v4si(
+  _mm_maskz_ternarylogic_epi32(
+    (__mmask8)0x05,
+    ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, 0x12345678, 0})),               // A
+    ((__m128i)((__v4si){0, 0, 0x0000FFFF, (int)0xFFFFFFFF})),               // B
+    ((__m128i)((__v4si){0, 0, 0x0000000F, 0})),                             // C
+    (unsigned char)0xBF),
+  (int)0xFFFFFFFF, 0, (int)0xFFFFA98F, 0));
 TEST_CONSTEXPR(match_v4si(
   _mm_maskz_ternarylogic_epi32(
     (__mmask8)0x03,

>From e55bd08facad7349bc91ecf954417216e75054c1 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Tue, 30 Sep 2025 23:18:09 -0700
Subject: [PATCH 15/48] Add better testcases and try to debug

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp   |  74 +++-
 clang/test/CodeGen/X86/avx512f-builtins.c  | 364 ++++++++++++-----
 clang/test/CodeGen/X86/avx512vl-builtins.c | 430 +++++++++++----------
 3 files changed, 547 insertions(+), 321 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index c5cfb85ca7439..92ceed8a71fde 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2877,14 +2877,15 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
   return true;
 }
 
-static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
-                                     const CallExpr *Call, bool MaskZ) {
+static bool interp__builtin_ia32_pternlog_maskz(InterpState &S, CodePtr OpPC,
+                                                const CallExpr *Call) {
   assert(Call->getNumArgs() == 5);
 
   const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>();
   const unsigned DstLen = VecT->getNumElements();
   const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType());
-  const bool DstUnsigned = VecT->getElementType()->isUnsignedIntegerOrEnumerationType();
+  const bool DstUnsigned =
+      VecT->getElementType()->isUnsignedIntegerOrEnumerationType();
 
   const APInt U = popToAPSInt(S, Call->getArg(4));
   const APInt Imm = popToAPSInt(S, Call->getArg(3));
@@ -2894,7 +2895,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
 
   const Pointer &Dst = S.Stk.peek<Pointer>();
 
-  for (unsigned I = 0; I != DstLen; ++I) {
+  for (unsigned I = 0; I < DstLen; ++I) {
     APInt ALane;
     APInt BLane;
     APInt CLane;
@@ -2905,18 +2906,57 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
     });
     const unsigned BitWidth = ALane.getBitWidth();
     APInt RLane(BitWidth, 0);
-    // bool DstUnsigned = ALane.isUnsigned();
-    
-    #define DEBUG_TYPE "ptern"
-    LLVM_DEBUG({
-      ALane.print(llvm::dbgs(), false); llvm::dbgs() << "\n";
-      BLane.print(llvm::dbgs(), false); llvm::dbgs() << "\n";
-      CLane.print(llvm::dbgs(), false); llvm::dbgs() << "\n";
-      RLane.print(llvm::dbgs(), false); llvm::dbgs() << "\n";
+
+    if (U[I]) {
+      for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
+        unsigned ABit = ALane[Bit];
+        unsigned BBit = BLane[Bit];
+        unsigned CBit = CLane[Bit];
+
+        unsigned Idx = (ABit << 2) | (BBit << 1) | (CBit);
+        RLane.setBitVal(Bit, Imm[Idx]);
+      }
+    }
+    INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
+      Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
     });
+  }
+  Dst.initializeAllElements();
+  return true;
+}
+
+static bool interp__builtin_ia32_pternlog_mask(InterpState &S, CodePtr OpPC,
+                                               const CallExpr *Call) {
+  assert(Call->getNumArgs() == 5);
+
+  const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>();
+  const unsigned DstLen = VecT->getNumElements();
+  const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType());
+  const bool DstUnsigned =
+      VecT->getElementType()->isUnsignedIntegerOrEnumerationType();
+
+  const APInt U = popToAPSInt(S, Call->getArg(4));
+  const APInt Imm = popToAPSInt(S, Call->getArg(3));
+  const Pointer &C = S.Stk.pop<Pointer>();
+  const Pointer &B = S.Stk.pop<Pointer>();
+  const Pointer &A = S.Stk.pop<Pointer>();
+
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  for (unsigned I = 0; I < DstLen; ++I) {
+    APInt ALane;
+    APInt BLane;
+    APInt CLane;
+    INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
+      ALane = A.elem<T>(I).toAPSInt();
+      BLane = B.elem<T>(I).toAPSInt();
+      CLane = C.elem<T>(I).toAPSInt();
+    });
+    const unsigned BitWidth = ALane.getBitWidth();
+    APInt RLane(BitWidth, 0);
 
     if (U[I]) {
-      for (unsigned Bit = 0; Bit != BitWidth; ++Bit) {
+      for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
         unsigned ABit = ALane[Bit];
         unsigned BBit = BLane[Bit];
         unsigned CBit = CLane[Bit];
@@ -2927,10 +2967,6 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC,
       INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
         Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
       });
-    } else if (MaskZ) {
-      INT_TYPE_SWITCH_NO_BOOL(DstElemT, { // Zeroes lane
-        Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
-      });
     } else {
       INT_TYPE_SWITCH_NO_BOOL(DstElemT,
                               { Dst.elem<T>(I) = static_cast<T>(APSInt(ALane, DstUnsigned)); });
@@ -3729,14 +3765,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_pternlogq128_mask:
   case X86::BI__builtin_ia32_pternlogq256_mask:
   case X86::BI__builtin_ia32_pternlogq512_mask:
-    return interp__builtin_pternlog(S, OpPC, Call, /*MaskZ=*/false);
+    return interp__builtin_ia32_pternlog_mask(S, OpPC, Call);
   case X86::BI__builtin_ia32_pternlogd128_maskz:
   case X86::BI__builtin_ia32_pternlogd256_maskz:
   case X86::BI__builtin_ia32_pternlogd512_maskz:
   case X86::BI__builtin_ia32_pternlogq128_maskz:
   case X86::BI__builtin_ia32_pternlogq256_maskz:
   case X86::BI__builtin_ia32_pternlogq512_maskz:
-    return interp__builtin_pternlog(S, OpPC, Call, /*MaskZ=*/true);
+    return interp__builtin_ia32_pternlog_maskz(S, OpPC, Call);
   case Builtin::BI__builtin_elementwise_fshl:
     return interp__builtin_elementwise_triop(S, OpPC, Call,
                                              llvm::APIntOps::fshl);
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 7233999787218..fd446c1f22003 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3,7 +3,10 @@
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
 
-
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
 
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
@@ -6272,25 +6275,76 @@ __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) {
 }
 TEST_CONSTEXPR(match_v16si(
   _mm512_ternarylogic_epi32(
-    ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
-    ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
-    ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
-    (unsigned char)0xCA), // A ? B : C
-  0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC));
+    ((__m512i)((__v16si){
+      0x6AA79987, (int)0xBB91433A, 0x029A7245, (int)0xD1F6F86C,
+      (int)0xD340BBCD, (int)0xCD8778E7, 0x4C73A942, (int)0xDAEA58BA,
+      0x5E503A67, (int)0xEE897110, 0x3193CA54, 0x452EC40A,
+      (int)0x90E5E945, 0x6FACAA50, 0x29645F8B, 0x5F811CB9
+    })),
+    ((__m512i)((__v16si){
+      0x1FCFF454, (int)0xDFC9E3B1, 0x6ED4E94B, 0x42D6CB5C,
+      (int)0x8FE46024, (int)0xA091250E, 0x2CA1C789, (int)0x9C9CEA0C,
+      (int)0x8D9FE5B9, 0x2FD2B7A4, 0x5ADAD121, (int)0xBCF74D7A,
+      (int)0xF543BBCF, (int)0xBB9D58E4, 0x175F0CD2, (int)0x87F26AEE
+    })),
+    ((__m512i)((__v16si){
+      (int)0xFA882692, (int)0xBC428D42, 0x6980A81F, (int)0x95C5FB98,
+      (int)0x8101E89A, 0x2AA4857E, 0x25ECE845, 0x34A9AF41,
+      (int)0xB80E3B0D, 0x13ED748B, 0x30A1F6D5, (int)0xD64A3CE0,
+      0x57708107, 0x527122DC, 0x06057C82, 0x7576714A
+    })),
+    (unsigned char)0x11), // ~A & ~C
+  0x00300929, 0x0034100C, (int)0x902B16A0, 0x28280423,
+  0x701A1741, 0x554A5A81, (int)0xD2121032, 0x434210B2,
+  0x42600042, (int)0xC0000850, (int)0x8504080A, 0x01008205,
+  0x088C4430, 0x04028503, (int)0xE8A0832D, 0x08098411));
 TEST_CONSTEXPR(match_v16si(
   _mm512_ternarylogic_epi32(
-    ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
-    ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0xFE), // A | B | C
-  0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF));
+    ((__m512i)((__v16si){
+      (int)0xA3B1799D, (int)0x46685257, (int)0x392456DE, (int)0xBC8960A9,
+      (int)0x6C031199, (int)0x07A0CA6E, (int)0x37F8A88B, (int)0x8B8148F6,
+      (int)0x386ECBE0, (int)0x96DA1DAC, (int)0xCE4A2BBD, (int)0xB2B9437A,
+      (int)0x571AA876, (int)0x27CD8130, (int)0x562B0F79, (int)0x17BE3111
+    })),
+    ((__m512i)((__v16si){
+      (int)0x18C26797, (int)0xD8F56413, (int)0x9A8DCA03, (int)0xCE9FF57F,
+      (int)0xBACFB3D0, (int)0x89463E85, (int)0x60E7A113, (int)0x8D5288F1,
+      (int)0xDC98D2C1, (int)0x93CD59BF, (int)0xB45ED1F0, (int)0x19DB3AD0,
+      (int)0x47294739, (int)0x5D65A441, (int)0x5EC42E08, (int)0xA5E5A5AB
+    })),
+    ((__m512i)((__v16si){
+      (int)0xBAA80DD4, (int)0x29D4BEEF, (int)0x6123FDF7, (int)0x8E944239,
+      (int)0xAF42E12F, (int)0xC6A7EE39, (int)0x50C187FC, (int)0x448AAA9E,
+      (int)0x508EBAD7, (int)0xA7CAD415, (int)0x757750A9, (int)0x43CF2FDE,
+      (int)0x95A76D79, (int)0x663F1C97, (int)0xFF5E9FF0, (int)0x827050A8
+    })),
+    (unsigned char)0x38), // (C & ~B) | (~C & A & B)
+  (int)0xBB311C08, (int)0x0E9C3644, (int)0x21219CDD, (int)0x32140090,
+  (int)0xC640A009, (int)0x86A6E46B, (int)0x57190998, (int)0x0683C006,
+  (int)0x60E61921, (int)0x05124411, (int)0x7A147A0D, (int)0xA36269AA,
+  (int)0x1033ED4F, (int)0x62A80531, (int)0x086F0171, (int)0x925A10B8));
 TEST_CONSTEXPR(match_v16si(
   _mm512_ternarylogic_epi32(
-    ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
-    ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80), // A & B & C
-  0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
+    ((__m512i)((__v16si){
+      (int)0x3193CA54, (int)0x90E5E945, (int)0x29645F8B, (int)0x6ED4E94B,
+      (int)0x8D9FE5B9, (int)0x8101E89A, (int)0x25ECE845, (int)0xB80E3B0D,
+      (int)0x57708107, (int)0x06057C82, (int)0x56EAA301, (int)0xBE99854A,
+      (int)0x00E266D0, (int)0xDEEA959E, (int)0x2DCAABD5, (int)0x6A1ECCDA})),
+    ((__m512i)((__v16si){
+      (int)0x93FD7234, (int)0xBC90A6EC, (int)0xD3285151, (int)0xCE9FB6A8,
+      (int)0x3B788B66, (int)0xDF8960AD, (int)0x2F927291, (int)0x96AF0DEA,
+      (int)0xF56AE7EA, (int)0x2A04F77A, (int)0xD50B612B, (int)0x3AA725CB,
+      (int)0x8A04F74F, (int)0x282FE557, (int)0x52E1FBB0, (int)0x0CA02F4D})),
+    ((__m512i)((__v16si){
+      (int)0xB6307BAD, (int)0x141CB03E, (int)0xEBAA7701, (int)0xC9F0B072,
+      (int)0x5E2503DD, (int)0xC2E1DAC4, (int)0x0FC01B11, (int)0xA0485922,
+      (int)0x339BB47E, (int)0xB2D4F32A, (int)0x8E7AE9AF, (int)0x147DE9B0,
+      (int)0xF79FCAA0, (int)0x3B0B6398, (int)0x29DDF4C7, (int)0x49CDBEC7})),
+    (unsigned char)0xC3), // ~(B ^ C)
+  (int)0x5D91479F, (int)0xD38AB056, (int)0x05B3F125, (int)0x5FB4A01C,
+  (int)0x49189120, (int)0xA17777C8, (int)0xF581652B, (int)0xD15EC918,
+  (int)0x5DE59912, (int)0xD3FE7407, (int)0x7C1E3DD5, (int)0x7BC15F7E,
+  (int)0x75196E60, (int)0x093A8F36, (int)0x80D4AF9A, (int)0x99411C68));
 
 __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
   // CHECK-LABEL: test_mm512_mask_ternarylogic_epi32
@@ -6300,37 +6354,61 @@ __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i _
 }
 TEST_CONSTEXPR(match_v16si(
   _mm512_mask_ternarylogic_epi32(
-    ((__m512i)((__v16si){0x1, 0x0, 0x2, 0x0, 0x3, 0x0, 0x4, 0x0,
-                         0x5, 0x0, 0x6, 0x0, 0x7, 0x0, 0x8, 0x0})),
-    (__mmask16)0xA55A,
-    ((__m512i)((__v16si){0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10, 0x11,
-                         0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19})),
-    ((__m512i)((__v16si){0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
-                         0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10})),
-    (unsigned char)0xCA), // A ? B : C
-  0x1, 0x2, 0x2, 0x4, 0x6, 0x0, 0x3, 0x0, 0x8, 0x0, 0xD, 0x0, 0x7, 0xE, 0x8, 0x10));
+    _mm512_setr_epi32(
+      (int)0xFFFFFFFF, 0x00000000, (int)0xDEADBEEF, (int)0xCAFEBABE, 0x12345678, (int)0x87654321,
+      (int)0xAAAAAAAA, 0x55555555, (int)0xF00DBEEF, (int)0xBAD2FEAF, 0x0112358D, (int)0xDEADF00D,
+      (int)0x8BADF00D, (int)0xBADDCAFE, (int)0xBAADF00D, (int)0xBAAAAAAD),
+    (__mmask16)0x9D71,
+    _mm512_setr_epi32(
+      0x11111111, 0x22222222, 0x33333333, 0x44444444, (int)0xABCDEF01, (int)0xFEDCBA98,
+      (int)0xCCCCCCCC, 0x33333333, 0x1337BEEF, 0x01010101, (int)0x81321345, (int)0xBAADF00D,
+      0x1BADB002, 0x5EE7C0DE, 0x12345678, 0x55555555),
+    _mm512_setr_epi32(
+      (int)0xF0F0F0F0, 0x0F0F0F0F, 0x1234ABCD, (int)0x9876FEDC, 0x00FF00FF, (int)0xFF00FF00,
+      (int)0xFF0000FF, 0x00FFFF00, 0x50D4CAFE, (int)0x8BADF00D, (int)0xABCDEFFF, (int)0xFEEDF00D,
+      (int)0xBEEFCAFE, (int)0xDEADC0DE, (int)0x1BADBEEF, 0x33333333),
+    (unsigned char)0xB1), // op: (~B & (A | ~C)) | (B & A & C)
+  (int)0xFEFEFEFE, 0x00000000, (int)0xDEADBEEF, (int)0xCAFEBABE, 0x54341078, (int)0x87234367,
+  (int)0xAA3333AA, 0x55555555, (int)0xFC0C8BEE, (int)0xBAD2FEAF, 0x5500258D, (int)0xDFBFFFFF,
+  (int)0xCABDC50D, (int)0xBADDCAFE, (int)0xBAADF00D, (int)0xBAAAAAA9));
 TEST_CONSTEXPR(match_v16si(
   _mm512_mask_ternarylogic_epi32(
-    ((__m512i)((__v16si){0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
-                         0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF})),
-    (__mmask16)0x0F0F,
-    ((__m512i)((__v16si){0x1, 0x2, 0x4, 0x8, 0x1, 0x2, 0x4, 0x8,
-                         0x1, 0x2, 0x4, 0x8, 0x1, 0x2, 0x4, 0x8})),
-    ((__m512i)((__v16si){0x10, 0x20, 0x40, 0x80, 0x10, 0x20, 0x40, 0x80,
-                         0x10, 0x20, 0x40, 0x80, 0x10, 0x20, 0x40, 0x80})),
-    (unsigned char)0xFE), // A | B | C
-  0x11, 0x23, 0x46, 0x8B, 0x4, 0x5, 0x6, 0x7, 0x19, 0x2B, 0x4E, 0x8B, 0xC, 0xD, 0xE, 0xF));
+    _mm512_setr_epi32(
+      0x0000FFFF, (int)0xFFFF0000, 0x01010101, (int)0xFF00FF00, (int)0xAAAAAAAA, 0x33333333,
+      (int)0xF0F0F0F0, 0x0F0F0F0F, 0x12345678, (int)0x87654321, 0x7FFFFFFF, (int)0xDEADBEEF,
+      (int)0xCAFEBABE, 0x01234567, (int)0xABCDEF01, (int)0xFEDCBA98),
+    (__mmask16)0x3C3C,
+    _mm512_setr_epi32(
+      0x1111EEEE, 0x2222DDDD, (int)0x80808080, 0x00FF00FF, 0x55555555, 0x00000000,
+      (int)0xCCCCCCCC, 0x33333333, 0x11111111, 0x22222222, (int)0x80000000, 0x12345678,
+      0x11223344, (int)0xFEDCBA98, (int)0xBAD0BAD0, (int)0xBEEFCAFE),
+    _mm512_setr_epi32(
+      0x12345678, (int)0x87654321, 0x7F7F7F7F, (int)0xFEDCBA98, (int)0xCCCCCCCC, (int)0xFFFFFFFF,
+      0x11111111, 0x22222222, (int)0xABABABAB, (int)0xCDCDCDCD, 0x00000001, (int)0xFACEB00C,
+      0x55667788, (int)0xABCDEF01, 0x12345678, (int)0xDEADBEEF),
+    (unsigned char)0xE8), // op: (A & B) | (B & C) | (C & A) (Majority)
+  0x0000FFFF, (int)0xFFFF0000, 0x01010101, (int)0xFEDCBA98, (int)0xCCCCCCCC, 0x33333333,
+  (int)0xF0F0F0F0, 0x0F0F0F0F, 0x12345678, (int)0x87654321, 0x00000001, (int)0xDAACB66C,
+  0x5166338C, (int)0xABCDEF01, (int)0xABCDEF01, (int)0xFEDCBA98));
 TEST_CONSTEXPR(match_v16si(
   _mm512_mask_ternarylogic_epi32(
-    ((__m512i)((__v16si){0xF, 0x7, 0x3, 0x1, 0xF, 0x7, 0x3, 0x1,
-                         0xFF, 0xF, 0xF0, 0xAA, 0x55, 0xCC, 0x33, 0xFF})),
-    (__mmask16)0xAAAA,
-    ((__m512i)((__v16si){0xE, 0x7, 0x2, 0x1, 0xF, 0x0, 0x3, 0x0,
-                         0xF, 0xF0, 0xFF, 0x55, 0x55, 0x33, 0x33, 0xF})),
-    ((__m512i)((__v16si){0xD, 0x7, 0x0, 0x1, 0xF, 0x7, 0x0, 0x1,
-                         0xF0, 0xF, 0xF, 0xFF, 0xF, 0xCC, 0x33, 0xF0})),
-    (unsigned char)0x80), // A & B & C
-  0xF, 0x7, 0x3, 0x1, 0xF, 0x0, 0x3, 0x0, 0xFF, 0x0, 0xF0, 0x0, 0x55, 0x0, 0x33, 0x0));
+    _mm512_setr_epi32(
+      (int)0xDEADBEEF, 0x01234567, (int)0xAAAAAAAA, 0x0F0F0F0F, (int)0xBAADF00D, 0x00000001,
+      (int)0x80000000, 0x7FFFFFFF, (int)0xCAFEBABE, 0x13579BDF, (int)0xABCDEF01, (int)0xCAFEBABE,
+      (int)0xDEADBEEF, (int)0xFF00FF00, (int)0xBEEFCAFE, 0x00000001),
+    (__mmask16)0xBEEF,
+    _mm512_setr_epi32(
+      (int)0xFACEB00C, (int)0x89ABCDEF, 0x55555555, (int)0xF0F0F0F0, 0x1337C0DE, 0x00000002,
+      0x40000000, (int)0xBFFFFFFF, 0x00000000, 0x2468ACE0, 0x10FEDCBA, 0x00000000,
+      (int)0xFEEDFACE, 0x00FF00FF, 0x12345678, 0x00000002),
+    _mm512_setr_epi32(
+      0x12345678, (int)0xFFFFFFFF, (int)0xCCCCCCCC, (int)0x88888888, (int)0xDEADC0DE, 0x00000004,
+      0x20000000, (int)0xDFFFFFFF, (int)0xFFFFFFFF, (int)0xFEDCBA98, 0x55555555, (int)0xFFFFFFFF,
+      (int)0x8BADF00D, (int)0xF0F0F0F0, (int)0xFACEB00C, 0x00000003),
+    (unsigned char)0x96), // op: A ^ B ^ C (XOR3)
+  (int)0x3657589B, 0x77777777, 0x33333333, 0x77777777, (int)0xBAADF00D, 0x00000007,
+  (int)0xE0000000, 0x1FFFFFFF, (int)0xCAFEBABE, (int)0xC9E38DA7, (int)0xEE6666EE, 0x35014541,
+  (int)0xABEDB42C, 0x0F0F0F0F, (int)0xBEEFCAFE, 0x00000000));
 
 __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   // CHECK-LABEL: test_mm512_maskz_ternarylogic_epi32
@@ -6340,28 +6418,73 @@ __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i
 }
 TEST_CONSTEXPR(match_v16si(
   _mm512_maskz_ternarylogic_epi32(
-    (__mmask16)0x3333,
-    ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
-    ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
-    ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
-    (unsigned char)0xCA), // A ? B : C
-  0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0));
+    (__mmask16)0x6498,
+    ((__m512i)((__v16si){
+      1393174638, 1243877629,  -826208314, 1770837977,
+     -1678093555,  -414088391, 1288769935,  703296098,
+      1428104678,   405688910,  -167788555, 1965219804,
+     -1959018749,   514303227,   754191429,  579811517})),
+    ((__m512i)((__v16si){
+     -1301280384,  -923736510,  -797648805,   475853364,
+      1247377062,   213070102,   626020209,  2037794518,
+       122183669,  1712787569, -1042441569, -1416844145,
+      1374304252, -1323427639,  1432483217,  1621706359})),
+    ((__m512i)((__v16si){
+       234227517,  -313293475,  1851213039,  -300885844,
+     -1479339544,   575183087,  -655840260, -1853668117,
+       433622095,   933629633, -1324904005,   -68434060,
+       486070655,   226865941, -1461464269,  1471789621})),
+    (unsigned char)0xAB), // (~A & ~B) | (B & C)
+   0, 0, 0, -298592082,
+  -1479042568, 0, 0, -1752969749,
+   0, 0, -1157115461, 0,
+   0, 1304818453, -1427385541, 0));
 TEST_CONSTEXPR(match_v16si(
   _mm512_maskz_ternarylogic_epi32(
-    (__mmask16)0xCCCC,
-    ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
-    ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0xFE), // A | B | C
-  0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF));
+    (__mmask16)0xA593,
+    ((__m512i)((__v16si){
+      1789368711,  -1148107974,   43676229,  -772343700,
+      -750732339,   -846759705,  1282648386,  -622176070,
+      1582316135,   -292982512,   831769172,  1160692746,
+     -1863980731,   1873586768,   694443915,  1602297017})),
+    ((__m512i)((__v16si){
+       533722196,   -540417103,  1859447115,  1121373020,
+     -1880858588,  -1601100530,   748799881, -1667438068,
+     -1918900807,    802338724,  1524289825, -1124643462,
+      -180110385,  -1147315996,   392105170, -2014156050})),
+    ((__m512i)((__v16si){
+       -91740526,  -1136489150,  1770039327, -1782187112,
+     -2130581350,    715425150,   636282949,   883535681,
+     -1207026931,    334328971,   815920853,  -699777824,
+      1466990855,   1383146204,   101022850,  1970696522})),
+    (unsigned char)0x21), // (~B) & ~(A ^ C)
+   1611661482,   539234310,          0,          0,
+    538610824,           0,          0,    18874368,
+    270539268,           0, -1543175586,          0,
+            0,  1075980051,          0,  1342738432));
 TEST_CONSTEXPR(match_v16si(
   _mm512_maskz_ternarylogic_epi32(
-    (__mmask16)0x5555,
-    ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
-    ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80), // A & B & C
-  0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
+    (__mmask16)0xC3A5,
+    ((__m512i)((__v16si){
+      0x00000000, -0x1, (int)0x80000000, 0x7FFFFFFF,
+      (int)0xAAAAAAAA, 0x55555555, 0x00000001, (int)0xFFFFFFFE,
+      0x0000FFFF, (int)0xFFFF0000, (int)0xDEADBEEF, (int)0xCAFEBABE,
+      0x01234567, (int)0x89ABCDEF, 0x13579BDF, 0x2468ACE0})),
+    ((__m512i)((__v16si){
+      0x2468ACE0, 0x13579BDF, (int)0x89ABCDEF, 0x01234567,
+      (int)0xCAFEBABE, (int)0xDEADBEEF, (int)0xFFFF0000, 0x0000FFFF,
+      (int)0xFFFFFFFE, 0x00000001, 0x55555555, (int)0xAAAAAAAA,
+      0x7FFFFFFF, (int)0x80000000, -0x1, 0x00000000})),
+    ((__m512i)((__v16si){
+      -0x1, 0x00000000, -0x1, 0x00000000,
+      -0x1, 0x00000000, -0x1, 0x00000000,
+      -0x1, 0x00000000, -0x1, 0x00000000,
+      -0x1, 0x00000000, -0x1, 0x00000000})),
+    (unsigned char)0xC9), // F = (A & B) | (~A & ~(B ^ C))
+  0x2468ACE0, 0x0, (int)0x89ABCDEF, 0x0,
+  0x0, 0x74071445, 0x0, 0x0000FFFE,
+  (int)0xFFFFFFFE, 0x0000FFFE, 0x0, 0x0,
+  0x0, 0x0, (int)0xFFFFFFFF, (int)0xDB97531F));
 
 __m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) {
   // CHECK-LABEL: test_mm512_ternarylogic_epi64
@@ -6402,28 +6525,38 @@ __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __
 }
 TEST_CONSTEXPR(match_v8di(
   _mm512_mask_ternarylogic_epi64(
-    ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
-    (__mmask8)0x33,
-    ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
-    ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
-    (unsigned char)0xCA),
-  0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0));
-TEST_CONSTEXPR(match_v8di(
-  _mm512_mask_ternarylogic_epi64(
-    ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    (__mmask8)0xCC,
-    ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
-    ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0xFE),
-  0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF));
+    ((__m512i)((__v8di){0x0LL, 0x1LL, 0x2LL, 0x3LL, 0x4LL, 0x5LL, 0x6LL, 0x7LL})),
+    (__mmask8)0xFF,
+    ((__m512i)((__v8di){0x1LL, 0x1LL, 0x1LL, 0x1LL, 0x1LL, 0x1LL, 0x1LL, 0x1LL})),
+    ((__m512i)((__v8di){0x0LL, 0x0LL, 0x0LL, 0x0LL, 0x0LL, 0x0LL, 0x0LL, 0x0LL})),
+    (unsigned char)0x96),
+  0x1, 0x0, 0x3, 0x2, 0x5, 0x4, 0x7, 0x6));
 TEST_CONSTEXPR(match_v8di(
   _mm512_mask_ternarylogic_epi64(
-    ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    (__mmask8)0x55,
-    ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
-    ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80),
-  0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9));
+    ((__m512i)((__v8di){
+      (long long)0x9FD641D41C6A70FEULL, (long long)0xB51D9082CF18D398ULL,
+      (long long)0x730E520285F4D01BULL, (long long)0x347E72CE341FD932ULL,
+      (long long)0x438F8D9BEA5D486FULL, (long long)0xFDB554A5DEEF750DULL,
+      (long long)0x0ABAA254BFFC2308ULL, (long long)0x825FE29BF1D51FC6ULL
+    })),
+    (__mmask8)0xE4,
+    ((__m512i)((__v8di){
+      (long long)0xC1779B12FA832A6EULL, (long long)0xCF6E876B587C4762ULL,
+      (long long)0x25DC09833D4ECA24ULL, (long long)0x34E55E25691BB80AULL,
+      (long long)0x9A02450CD8F20DD7ULL, (long long)0x78B9E240FB5B77A9ULL,
+      (long long)0xE1F37F76C1162596ULL, (long long)0xDCCB561738CE2941ULL
+    })),
+    ((__m512i)((__v8di){
+      (long long)0xD13840986BC8DC3CULL, (long long)0x34CDE7E8C960187EULL,
+      (long long)0x7EE068D9D111EEB8ULL, (long long)0xAD11149DE686B811ULL,
+      (long long)0x849F38BFD9AB0DFAULL, (long long)0x5C28948ED106227BULL,
+      (long long)0xFB1918D4A18E304DULL, (long long)0x4EDE6944F84AD59FULL
+    })),
+    (unsigned char)0x67),
+  (long long)0x9FD641D41C6A70FEULL, (long long)0xB51D9082CF18D398ULL,
+  (long long)0xDB3DE57EEE5F25DCULL, (long long)0x347E72CE341FD932ULL,
+  (long long)0x438F8D9BEA5D486FULL, (long long)0x26D37FDE2A5DDDD2ULL,
+  (long long)0x1EEE67AB6099DDFBULL, (long long)0xB3353F73C6A4FCFEULL));
 
 __m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) {
   // CHECK-LABEL: test_mm512_maskz_ternarylogic_epi64
@@ -6433,28 +6566,57 @@ __m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i _
 }
 TEST_CONSTEXPR(match_v8di(
   _mm512_maskz_ternarylogic_epi64(
-    (__mmask8)0x33,
-    ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
-    ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
-    ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
-    (unsigned char)0xCA),
-  0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0));
-TEST_CONSTEXPR(match_v8di(
-  _mm512_maskz_ternarylogic_epi64(
-    (__mmask8)0xCC,
-    ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
-    ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0xFE),
-  0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF));
+    (__mmask8)0x6D,
+    ((__m512i)((__v8di){
+      (long long)0xFFFFFFFFFFFFFFFF, (long long)0x0000000000000000,
+      (long long)0x0000FFFF0000FFFF, (long long)0x5555555555555555,
+      (long long)0x0123456789ABCDEF, (long long)0x1122334455667788,
+      (long long)0x00000000FFFFFFFF, (long long)0x0F0F0F0F0F0F0F0F
+    })),
+    ((__m512i)((__v8di){
+      (long long)0x000000000000000B, (long long)0x000000000000000C,
+      (long long)0x00000000FFFF0000, (long long)0x3333333333333333,
+      (long long)0x0FEDCBA987654321, (long long)0x1111111111111111,
+      (long long)0x7FFFFFFFFFFFFFFF, (long long)0x2222222222222222
+    })),
+    ((__m512i)((__v8di){
+      (long long)0x000000000000000C, (long long)0x000000000000000B,
+      (long long)0x00F0F0F0F0F0F0F0, (long long)0x5555555555555555,
+      (long long)0x0000000000000000, (long long)0x7FFFFFFFFFFFFFFF,
+      (long long)0x0000000000000001, (long long)0x2222222222222222
+    })),
+    (unsigned char)0x89),
+  (long long)0x0000000000000008, (long long)0x0000000000000000,
+  (long long)0xFF0F0000F0F00000, (long long)0x9999999999999999,
+  (long long)0x0000000000000000, (long long)0x9111111111111111,
+  (long long)0x8000000000000001, (long long)0x0000000000000000));
+
 TEST_CONSTEXPR(match_v8di(
   _mm512_maskz_ternarylogic_epi64(
-    (__mmask8)0x55,
-    ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
-    ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80),
-  0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
+    (__mmask8)0x6D,
+    ((__m512i)((__v8di){
+      (long long)0xFFFFFFFFFFFFFFFF, (long long)0x0000000000000000,
+      (long long)0x0000FFFF0000FFFF, (long long)0x5555555555555555,
+      (long long)0x0123456789ABCDEF, (long long)0x1122334455667788,
+      (long long)0x00000000FFFFFFFF, (long long)0x0F0F0F0F0F0F0F0F
+    })),
+    ((__m512i)((__v8di){
+      (long long)0x000000000000000B, (long long)0x000000000000000C,
+      (long long)0x00000000FFFF0000, (long long)0x3333333333333333,
+      (long long)0x0FEDCBA987654321, (long long)0x1111111111111111,
+      (long long)0x7FFFFFFFFFFFFFFF, (long long)0x2222222222222222
+    })),
+    ((__m512i)((__v8di){
+      (long long)0x000000000000000C, (long long)0x000000000000000B,
+      (long long)0x00F0F0F0F0F0F0F0, (long long)0x5555555555555555,
+      (long long)0x0000000000000000, (long long)0x7FFFFFFFFFFFFFFF,
+      (long long)0x0000000000000001, (long long)0x2222222222222222
+    })),
+    (unsigned char)0x29),
+  (long long)0x0000000000000004, (long long)0x0000000000000000,
+  (long long)0xFF0FF0F0F0F0F0F0, (long long)0xCCCCCCCCCCCCCCCC,
+  (long long)0x0000000000000000, (long long)0x8033225544776699,
+  (long long)0x8000000000000000, (long long)0x0000000000000000));
 
 __m512 test_mm512_shuffle_f32x4(__m512 __A, __m512 __B) {
   // CHECK-LABEL: test_mm512_shuffle_f32x4
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index d786b5fb8ddbf..3104ba3cfd5aa 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -1,7 +1,8 @@
 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
 
-
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
 
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
@@ -8360,25 +8361,25 @@ __m128i test_mm_ternarylogic_epi32(__m128i __A, __m128i __B, __m128i __C) {
 }
 TEST_CONSTEXPR(match_v4si(
   _mm_ternarylogic_epi32(
-    ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})),
-    ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})),
-    ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })),
-    (unsigned char)0xCA),
-  0xB, 0xC, 0xB, 0xC));
+    ((__m128i)((__v4si){(int)0x7FFFFFFF, (int)0x80000000, (int)0xAAAAAAAA, 0x00000000})),
+    ((__m128i)((__v4si){0x00000000, (int)0xFFFFFFFF, 0x12345678, (int)0xFFFFFFFF})),
+    ((__m128i)((__v4si){(int)0xCAFEBABE, 0x0F0F0F0F, (int)0xFFFFFFFF, 0x00000000})),
+    (unsigned char)0xCA), /* B ? (A | C) : (C & ~A) */
+  (int)0x80000000, (int)0x8F0F0F0F, 0x5775577D, 0x00000000));
 TEST_CONSTEXPR(match_v4si(
   _mm_ternarylogic_epi32(
-    ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
-    ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
-    ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0xFE),
-  0xF, 0xF, 0xF, 0xF));
+    ((__m128i)((__v4si){0x12345678, (int)0x80000000, 0x00000000, (int)0xAAAAAAAA})),
+    ((__m128i)((__v4si){0x0000FFFF, 0x7FFFFFFF, 0x55555555, 0x00000000})),
+    ((__m128i)((__v4si){(int)0xF0F0F0F0, 0x00000001, 0x0F0F0F0F, 0x33333333})),
+    (unsigned char)0xFE), /* A | B | C */
+  (int)0xF2F4FFFF, (int)0xFFFFFFFF, 0x5F5F5F5F, (int)0xBBBBBBBB));
 TEST_CONSTEXPR(match_v4si(
   _mm_ternarylogic_epi32(
-    ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
-    ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
-    ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80),
-  0x0, 0x0, 0x0, 0x0));
+    ((__m128i)((__v4si){(int)0xFFFFFFFF, 0x12345678, (int)0x80000000, 0x0F0F0F0F})),
+    ((__m128i)((__v4si){0x00FF00FF, (int)0xFFFFFFFF, 0x7FFFFFFF, (int)0xF0F0F0F0})),
+    ((__m128i)((__v4si){0x0F0F0F0F, 0x00FF00FF, (int)0xFFFFFFFF, (int)0xFFFFFFFF})),
+    (unsigned char)0x80), /* A & B & C */
+  0x000F000F, 0x00340078, 0x00000000, 0x00000000));
 
 __m128i test_mm_mask_ternarylogic_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
   // CHECK-LABEL: test_mm_mask_ternarylogic_epi32
@@ -8417,57 +8418,30 @@ __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B,
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
   return _mm_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4); 
 }
-// B ? A : C   imm = 0xE2  (Idx = (A<<2)|(B<<1)|C per VPTERNLOG)
 TEST_CONSTEXPR(match_v4si(
   _mm_maskz_ternarylogic_epi32(
     (__mmask8)0x0B,
-    ((__m128i)((__v4si){(int)0xDEADBEEF, 0, (int)0xFFFFFFFF, 0x13579BDF})), // A
-    ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, 0})),          // B
-    ((__m128i)((__v4si){(int)0xCAFEBABE, (int)0xFFFFFFFF, 0, 0x2468ACE0})), // C
-    (unsigned char)0xE2),
+    ((__m128i)((__v4si){(int)0xDEADBEEF, 0, (int)0xFFFFFFFF, 0x13579BDF})),
+    ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, 0})),
+    ((__m128i)((__v4si){(int)0xCAFEBABE, (int)0xFFFFFFFF, 0, 0x2468ACE0})),
+    (unsigned char)0xE2), // B ? A : C
   (int)0xDEADBEEF, (int)0xFFFFFFFF, 0, 0x2468ACE0));
-  // ~(A & B) | ~(B & C)   imm = 0x7F
 TEST_CONSTEXPR(match_v4si(
   _mm_maskz_ternarylogic_epi32(
     (__mmask8)0x0C,
-    ((__m128i)((__v4si){0, (int)0xFFFFFFFF, (int)0xAAAAAAAA, 0x55555555})), // A
-    ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, (int)0xFFFFFFFF})), // B
-    ((__m128i)((__v4si){(int)0xF0F0F0F0, 0, 0, (int)0xFFFFFFFF})),           // C
-    (unsigned char)0x7F),
+    ((__m128i)((__v4si){0, (int)0xFFFFFFFF, (int)0xAAAAAAAA, 0x55555555})),
+    ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, (int)0xFFFFFFFF})),
+    ((__m128i)((__v4si){(int)0xF0F0F0F0, 0, 0, (int)0xFFFFFFFF})),
+    (unsigned char)0x7F),   // ~(A & B) | ~(B & C)
   0, 0, (int)0xFFFFFFFF, (int)0xAAAAAAAA));
-  // ~A | ~B | C   imm = 0xBF
 TEST_CONSTEXPR(match_v4si(
   _mm_maskz_ternarylogic_epi32(
     (__mmask8)0x05,
-    ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, 0x12345678, 0})),               // A
-    ((__m128i)((__v4si){0, 0, 0x0000FFFF, (int)0xFFFFFFFF})),               // B
-    ((__m128i)((__v4si){0, 0, 0x0000000F, 0})),                             // C
-    (unsigned char)0xBF),
+    ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, 0x12345678, 0})),
+    ((__m128i)((__v4si){0, 0, 0x0000FFFF, (int)0xFFFFFFFF})),
+    ((__m128i)((__v4si){0, 0, 0x0000000F, 0})),
+    (unsigned char)0xBF),   // ~A | ~B | C   imm = 0xBF
   (int)0xFFFFFFFF, 0, (int)0xFFFFA98F, 0));
-TEST_CONSTEXPR(match_v4si(
-  _mm_maskz_ternarylogic_epi32(
-    (__mmask8)0x03,
-    ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})),
-    ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})),
-    ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })),
-    (unsigned char)0xCA),
-  0xB, 0xC, 0x0, 0x0));
-TEST_CONSTEXPR(match_v4si(
-  _mm_maskz_ternarylogic_epi32(
-    (__mmask8)0x0C,
-    ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
-    ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
-    ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0xFE),
-  0x0, 0x0, 0xF, 0xF));
-TEST_CONSTEXPR(match_v4si(
-  _mm_maskz_ternarylogic_epi32(
-    (__mmask8)0x05,
-    ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})),
-    ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})),
-    ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80),
-  0x0, 0x0, 0x0, 0x0));
 
 __m256i test_mm256_ternarylogic_epi32(__m256i __A, __m256i __B, __m256i __C) {
   // CHECK-LABEL: test_mm256_ternarylogic_epi32
@@ -8476,25 +8450,25 @@ __m256i test_mm256_ternarylogic_epi32(__m256i __A, __m256i __B, __m256i __C) {
 }
 TEST_CONSTEXPR(match_v8si(
   _mm256_ternarylogic_epi32(
-    ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
-    ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
-    ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
-    (unsigned char)0xCA),
-  0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC));
+    ((__m256i)((__v8si){0x12345678, 0x00000000, (int)0xFFFFFFFF, 0x7FFFFFFF, (int)0x80000000, 0x00FF00FF, (int)0xF0F0F0F0, (int)0xAAAAAAAA})),
+    ((__m256i)((__v8si){(int)0xDEADBEEF, 0x11111111, 0x22222222, 0x33333333, 0x44444444, 0x55555555, 0x66666666, 0x77777777})),
+    ((__m256i)((__v8si){(int)0xCAFEBABE, (int)0x88888888, (int)0x99999999, (int)0xAAAAAAAA, (int)0xBBBBBBBB, (int)0xCCCCCCCC, (int)0xDDDDDDDD, (int)0xFFFFFFFF})),
+    (unsigned char)0xF0), /* A */
+  0x12345678, 0x00000000, (int)0xFFFFFFFF, 0x7FFFFFFF, (int)0x80000000, 0x00FF00FF, (int)0xF0F0F0F0, (int)0xAAAAAAAA));
 TEST_CONSTEXPR(match_v8si(
   _mm256_ternarylogic_epi32(
-    ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
-    ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0xFE),
-  0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF));
+    ((__m256i)((__v8si){0x12345678, 0x00000000, (int)0xFFFFFFFF, 0x7FFFFFFF, (int)0x80000000, 0x00FF00FF, (int)0xF0F0F0F0, (int)0xAAAAAAAA})),
+    ((__m256i)((__v8si){(int)0xAAAAAAAA, (int)0xBBBBBBBB, (int)0xCCCCCCCC, (int)0xDDDDDDDD, (int)0xEEEEEEEE, (int)0xFFFFFFFF, 0x00000000, 0x11111111})),
+    ((__m256i)((__v8si){0x22222222, 0x33333333, 0x44444444, 0x55555555, 0x66666666, 0x77777777, (int)0x88888888, (int)0x99999999})),
+    (unsigned char)0x0F), /* ~A */
+  (int)0xEDCBA987, (int)0xFFFFFFFF, 0x00000000, (int)0x80000000, 0x7FFFFFFF, (int)0xFF00FF00, 0x0F0F0F0F, 0x55555555));
 TEST_CONSTEXPR(match_v8si(
   _mm256_ternarylogic_epi32(
-    ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
-    ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80),
-  0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
+    ((__m256i)((__v8si){0x0F0F0F0F, (int)0xAAAAAAAA, 0x12345678, 0x00000000, (int)0xFFFFFFFF, 0x13579BDF, (int)0x80000000, 0x7FFFFFFF})),
+    ((__m256i)((__v8si){(int)0xF0F0F0F0, 0x55555555, 0x11111111, (int)0xFFFFFFFF, 0x00000000, 0x02468ACE, 0x7FFFFFFF, (int)0x80000000})),
+    ((__m256i)((__v8si){(int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA, (int)0xAAAAAAAA})),
+    (unsigned char)0x3C), /* A ^ B */
+  (int)0xFFFFFFFF, (int)0xFFFFFFFF, 0x03254769, (int)0xFFFFFFFF, (int)0xFFFFFFFF, 0x11111111, (int)0xFFFFFFFF, (int)0xFFFFFFFF));
 
 __m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
   // CHECK-LABEL: test_mm256_mask_ternarylogic_epi32
@@ -8504,28 +8478,28 @@ __m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __
 }
 TEST_CONSTEXPR(match_v8si(
   _mm256_mask_ternarylogic_epi32(
-    ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
-    (__mmask8)0x33,
-    ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
-    ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
-    (unsigned char)0xCA),
-  0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0));
+    ((__m256i)((__v8si){(int)0xFFFFFFFF, 0x00000000, 0x12345678, (int)0xAAAAAAAA, 0x7FFFFFFF, (int)0x80000000, 0x13579BDF, 0x2468ACE0})),
+    (__mmask8)0xA5,
+    ((__m256i)((__v8si){(int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x00000000})),
+    ((__m256i)((__v8si){0x00000000, (int)0xFFFFFFFF, 0x55555555, 0x33333333, (int)0x89ABCDEF, 0x00000000, (int)0xFFFFFFFF, 0x11111111})),
+    (unsigned char)0xE2), /* B ? A : C */
+  (int)0xFFFFFFFF, 0x00000000, 0x12345678, (int)0xAAAAAAAA, 0x7FFFFFFF, 0x00000000, 0x13579BDF, 0x11111111));
 TEST_CONSTEXPR(match_v8si(
   _mm256_mask_ternarylogic_epi32(
-    ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    (__mmask8)0xCC,
-    ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
-    ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0xFE),
-  0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF));
+    ((__m256i)((__v8si){0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F})),
+    (__mmask8)0xFF,
+    ((__m256i)((__v8si){0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF})),
+    ((__m256i)((__v8si){0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333})),
+    (unsigned char)0x96), /* A ^ B ^ C */
+  0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3, 0x3CC33CC3));
 TEST_CONSTEXPR(match_v8si(
   _mm256_mask_ternarylogic_epi32(
-    ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    (__mmask8)0x55,
-    ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
-    ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80),
-  0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9));
+    ((__m256i)((__v8si){(int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x12345678, (int)0xAAAAAAAA, 0x55555555, (int)0x80000000, 0x7FFFFFFF})),
+    (__mmask8)0x5A,
+    ((__m256i)((__v8si){0x00000000, (int)0xFFFFFFFF, 0x11111111, (int)0xFFFFFFFF, 0x55555555, (int)0xAAAAAAAA, (int)0x80000000, 0x7FFFFFFF})),
+    ((__m256i)((__v8si){0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000})),
+    (unsigned char)0xC0), /* A & B */
+  (int)0xFFFFFFFF, 0x00000000, (int)0xFFFFFFFF, 0x12345678, 0x00000000, 0x55555555, (int)0x80000000, 0x7FFFFFFF));
 
 __m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   // CHECK-LABEL: test_mm256_maskz_ternarylogic_epi32
@@ -8535,28 +8509,28 @@ __m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i _
 }
 TEST_CONSTEXPR(match_v8si(
   _mm256_maskz_ternarylogic_epi32(
-    (__mmask8)0x33,
-    ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+    (__mmask8)0x6D,
+    ((__m256i)((__v8si){(int)-1, 0, (int)-1, 0, (int)-1, 0, (int)-1, 0})),
     ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
     ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
-    (unsigned char)0xCA),
-  0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0));
+    (unsigned char)0x30), /* A & ~B */
+  (int)0xFFFFFFF4, 0, (int)0xFFFFFFF4, 0, 0, 0, (int)0xFFFFFFF4, 0));
 TEST_CONSTEXPR(match_v8si(
   _mm256_maskz_ternarylogic_epi32(
-    (__mmask8)0xCC,
+    (__mmask8)0x90,
     ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
     ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
     ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0xFE),
-  0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF));
+    (unsigned char)0x44), /* B & ~C */
+  0, 0, 0, 0, 0x4, 0, 0, 0x4));
 TEST_CONSTEXPR(match_v8si(
   _mm256_maskz_ternarylogic_epi32(
-    (__mmask8)0x55,
-    ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
-    ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+    (__mmask8)0x0F,
+    ((__m256i)((__v8si){0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3})),
+    ((__m256i)((__v8si){0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1})),
     ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80),
-  0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
+    (unsigned char)0x28), /* (A ^ B) & C */
+  0x2, 0x2, 0x2, 0x2, 0, 0, 0, 0));
 
 __m128i test_mm_ternarylogic_epi64(__m128i __A, __m128i __B, __m128i __C) {
   // CHECK-LABEL: test_mm_ternarylogic_epi64
@@ -8565,25 +8539,28 @@ __m128i test_mm_ternarylogic_epi64(__m128i __A, __m128i __B, __m128i __C) {
 }
 TEST_CONSTEXPR(match_v2di(
   _mm_ternarylogic_epi64(
-    ((__m128i)((__v2di){-0x1, 0x0})),
-    ((__m128i)((__v2di){0xB, 0xB})),
-    ((__m128i)((__v2di){0xC, 0xC})),
-    (unsigned char)0xCA),
-  0xB, 0xC));
+    ((__m128i)((__v2di){ (long long)0xBB91433A6AA79987ULL, (long long)0xD1F6F86C029A7245ULL })),
+    ((__m128i)((__v2di){ (long long)0xCD8778E7D340BBCDULL, (long long)0xDAEA58BA4C73A942ULL })),
+    ((__m128i)((__v2di){ (long long)0xEE8971105E503A67ULL,  (long long)0x452EC40A3193CA54ULL })),
+    (unsigned char)0x77),  // F = ~(A & B)
+  (long long)0x337E8FFFADBFC5BAULL,
+  (long long)0xBFD5BFF5FFEC77BFULL));
 TEST_CONSTEXPR(match_v2di(
   _mm_ternarylogic_epi64(
-    ((__m128i)((__v2di){0x9, 0x9})),
-    ((__m128i)((__v2di){0x4, 0x4})),
-    ((__m128i)((__v2di){0x2, 0x2})),
-    (unsigned char)0xFE),
-  0xF, 0xF));
+    ((__m128i)((__v2di){ (long long)0x6FACAA5090E5E945ULL, (long long)0x5F811CB929645F8BULL })),
+    ((__m128i)((__v2di){ (long long)0xDFC9E3B11FCFF454ULL, (long long)0x42D6CB5C6ED4E94BULL })),
+    ((__m128i)((__v2di){ (long long)0xA091250E8FE46024ULL, (long long)0x9C9CEA0C2CA1C789ULL })),
+    (unsigned char)0xDD),  // F = (~A) | B
+  (long long)0xDFEFFBF17FDFFFDFULL,
+  (long long)0x63F7DFFFFFDEF97FULL));
 TEST_CONSTEXPR(match_v2di(
   _mm_ternarylogic_epi64(
-    ((__m128i)((__v2di){0x9, 0x9})),
-    ((__m128i)((__v2di){0x4, 0x4})),
-    ((__m128i)((__v2di){0x2, 0x2})),
-    (unsigned char)0x80),
-  0x0, 0x0));
+    ((__m128i)((__v2di){ (long long)0x2FD2B7A48D9FE5B9ULL, (long long)0xBCF74D7A5ADAD121ULL })),
+    ((__m128i)((__v2di){ (long long)0xBB9D58E4F543BBCFULL, (long long)0x87F26AEE175F0CD2ULL })),
+    ((__m128i)((__v2di){ (long long)0xBC428D42FA882692ULL, (long long)0x95C5FB986980A81FULL })),
+    (unsigned char)0x22),  // F = A & ~B
+  (long long)0x044285020A880410ULL,
+  (long long)0x100591106880A00DULL));
 
 __m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
   // CHECK-LABEL: test_mm_mask_ternarylogic_epi64
@@ -8593,28 +8570,38 @@ __m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B,
 }
 TEST_CONSTEXPR(match_v2di(
   _mm_mask_ternarylogic_epi64(
-    ((__m128i)((__v2di){-0x1, 0x0})),
-    (__mmask8)0x33,
-    ((__m128i)((__v2di){0xB, 0xB})),
-    ((__m128i)((__v2di){0xC, 0xC})),
-    (unsigned char)0xCA),
-  0xB, 0xC));
+    ((__m128i)((__v2di){(long long)0xF4C3B00C0D15EA5ELL, (long long)0x0123456789ABCDE0LL})),
+    (__mmask8)0x9D,
+    ((__m128i)((__v2di){(long long)0x9A7F3C2155EE00DDLL, (long long)0xDEADBEEFCAFEBABELL})),
+    ((__m128i)((__v2di){(long long)0x00F0F0F0F0F0F0F0LL, (long long)0x13579BDF2468ACE0LL})),
+    (unsigned char)0xFF), // All 1s
+  (long long)-1,
+  (long long)0x0123456789ABCDE0LL));
 TEST_CONSTEXPR(match_v2di(
   _mm_mask_ternarylogic_epi64(
-    ((__m128i)((__v2di){0x9, 0x9})),
-    (__mmask8)0xCC,
-    ((__m128i)((__v2di){0x4, 0x4})),
-    ((__m128i)((__v2di){0x2, 0x2})),
-    (unsigned char)0xFE),
-  0x9, 0x9));
+    ((__m128i)((__v2di){ (long long)0x3A7C19E54B20D8A1LL, (long long)0x4F12B39D0C85E762LL })),
+    (__mmask8)0xD2,
+    ((__m128i)((__v2di){ (long long)0x6D93A0F217C54E3BLL, (long long)0x24E1C7A95B08D6F2LL })),
+    ((__m128i)((__v2di){ (long long)0x5A0C3E19D472B8F5LL, (long long)0x0187D3B2C9E4056ALL })),
+    (unsigned char)0x00),
+  (long long)0x3A7C19E54B20D8A1LL,
+  (long long)0x0LL)); // All 0s
 TEST_CONSTEXPR(match_v2di(
   _mm_mask_ternarylogic_epi64(
-    ((__m128i)((__v2di){0x9, 0x9})),
-    (__mmask8)0x55,
-    ((__m128i)((__v2di){0x4, 0x4})),
-    ((__m128i)((__v2di){0x2, 0x2})),
-    (unsigned char)0x80),
-  0x0, 0x9));
+    ((__m128i)((__v2di){
+      (long long)0xA3F10B6C7D8294E1ULL, (long long)0x19D4E7350AB2C98FLL
+    })),
+    (__mmask8)0xB5,
+    ((__m128i)((__v2di){
+      (long long)0x5C2E9A10F4B7D863LL, (long long)0x9B7E1D2C3A4F5E60LL
+    })),
+    ((__m128i)((__v2di){
+      (long long)0x2A6D3F81C9E047B5LL, (long long)0x7F0A1C3E5D2B6490LL
+    })),
+    (unsigned char)0x55), // ~C
+  (long long)0xD592C07E361FB84AULL,
+  (long long)0x19D4E7350AB2C98FLL
+));
 
 __m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   // CHECK-LABEL: test_mm_maskz_ternarylogic_epi64
@@ -8624,28 +8611,50 @@ __m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B,
 }
 TEST_CONSTEXPR(match_v2di(
   _mm_maskz_ternarylogic_epi64(
-    (__mmask8)0x03,
-    ((__m128i)((__v2di){-0x1, 0x0})),
-    ((__m128i)((__v2di){0xB, 0xB})),
-    ((__m128i)((__v2di){0xC, 0xC})),
-    (unsigned char)0xCA),
-  0xB, 0xC));
+    (__mmask8)0xA9,
+    ((__m128i)((__v2di){
+      (long long)0x8F3A5C7E21D4B690ULL, (long long)0x5AD02CE19B7F46A3ULL
+    })),
+    ((__m128i)((__v2di){
+      (long long)0xC19E04B2A7D35F68ULL, (long long)0x2F7B93C4E1A05D76ULL
+    })),
+    ((__m128i)((__v2di){
+      (long long)0x7A0C1D2E3F405162ULL, (long long)0xD4E5F60718293A4BULL
+    })),
+    (unsigned char)0xD2),  // F = C ? (B | ~A) : (A & ~B)
+  (long long)0xB53A457239D4B692ULL,
+  (long long)0x0ULL));
 TEST_CONSTEXPR(match_v2di(
   _mm_maskz_ternarylogic_epi64(
-    (__mmask8)0x0C,
-    ((__m128i)((__v2di){0x9, 0x9})),
-    ((__m128i)((__v2di){0x4, 0x4})),
-    ((__m128i)((__v2di){0x2, 0x2})),
-    (unsigned char)0xFE),
-  0x0, 0x0));
+    (__mmask8)0xB6,
+    ((__m128i)((__v2di){
+      (long long)0x83C1D2E3F4051627ULL, (long long)0x5A0B1C2D3E4F6071ULL
+    })),
+    ((__m128i)((__v2di){
+      (long long)0x9E8D7C6B5A493827ULL, (long long)0x13579BDF2468ACE0ULL
+    })),
+    ((__m128i)((__v2di){
+      (long long)0x02468ACE13579BDFULL, (long long)0xFEDCBA9876543210ULL
+    })),
+    (unsigned char)0xFE),  // F = A | B | C
+  (long long)0x0ULL,
+  (long long)0xFFDFBFFF7E7FFEF1ULL));
 TEST_CONSTEXPR(match_v2di(
   _mm_maskz_ternarylogic_epi64(
-    (__mmask8)0x05,
-    ((__m128i)((__v2di){0x9, 0x9})),
-    ((__m128i)((__v2di){0x4, 0x4})),
-    ((__m128i)((__v2di){0x2, 0x2})),
-    (unsigned char)0x80),
-  0x0, 0x0));
+    (__mmask8)0xA5,
+    ((__m128i)((__v2di){
+      (long long)0x1C80317FA3B1799DULL, (long long)0xBDD640FB06671AD1ULL
+    })),
+    ((__m128i)((__v2di){
+      (long long)0x3EB13B9046685257ULL, (long long)0x23B8C1E9392456DEULL
+    })),
+    ((__m128i)((__v2di){
+      (long long)0x1A3D1FA7BC8960A9ULL, (long long)0xBD9C66B3AD3C2D6DULL
+    })),
+    (unsigned char)0x80),  // F = A & B & C
+  (long long)0x1800110000004001ULL,
+  (long long)0x0ULL
+));
 
 __m256i test_mm256_ternarylogic_epi64(__m256i __A, __m256i __B, __m256i __C) {
   // CHECK-LABEL: test_mm256_ternarylogic_epi64
@@ -8657,22 +8666,15 @@ TEST_CONSTEXPR(match_v4di(
     ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})),
     ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})),
     ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})),
-    (unsigned char)0xCA),
-  0xB, 0xC, 0xB, 0xC));
+    (unsigned char)0x94),
+  (long long)-0x8, (long long)0x3, (long long)-0x8, (long long)0x3));
 TEST_CONSTEXPR(match_v4di(
   _mm256_ternarylogic_epi64(
     ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
     ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
     ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0xFE),
-  0xF, 0xF, 0xF, 0xF));
-TEST_CONSTEXPR(match_v4di(
-  _mm256_ternarylogic_epi64(
-    ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
-    ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
-    ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80),
-  0x0, 0x0, 0x0, 0x0));
+    (unsigned char)0x76),
+  (long long)0xF, (long long)0xF, (long long)0xF, (long long)0xF));
 
 __m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
   // CHECK-LABEL: test_mm256_mask_ternarylogic_epi64
@@ -8682,28 +8684,44 @@ __m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __
 }
 TEST_CONSTEXPR(match_v4di(
   _mm256_mask_ternarylogic_epi64(
-    ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})),
-    (__mmask8)0x33,
-    ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})),
-    ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})),
-    (unsigned char)0xCA),
-  0xB, 0xC, -0x1, 0x0));
-TEST_CONSTEXPR(match_v4di(
-  _mm256_mask_ternarylogic_epi64(
-    ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
-    (__mmask8)0xCC,
-    ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
-    ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0xFE),
-  0x9, 0x9, 0xF, 0xF));
+    ((__m256i)((__v4di){
+      (long long)0x0123456789ABCDEFULL, (long long)0x0F0F0F0F0F0F0F0FULL,
+      (long long)0xAAAAAAAAAAAAAAAALL, (long long)0x13579BDF02468ACEULL
+    })),
+    (__mmask8)0x09,
+    ((__m256i)((__v4di){
+      (long long)0x1111111111111111ULL, (long long)0x2222222222222222ULL,
+      (long long)0x3333333333333333ULL, (long long)0x4444444444444444ULL
+    })),
+    ((__m256i)((__v4di){
+      (long long)0x5555555555555555ULL, (long long)0x6666666666666666ULL,
+      (long long)0x7777777777777777ULL, (long long)0x8888888888888888ULL
+    })),
+    (unsigned char)0x12),
+  (long long)0x44660022CCEE88AAULL,
+  (long long)0x0F0F0F0F0F0F0F0FULL,
+  (long long)0xAAAAAAAAAAAAAAAALL,
+  (long long)0x9B9B13138A8A0202ULL));
 TEST_CONSTEXPR(match_v4di(
   _mm256_mask_ternarylogic_epi64(
-    ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
-    (__mmask8)0x55,
-    ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
-    ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80),
-  0x0, 0x9, 0x0, 0x9));
+    ((__m256i)((__v4di){
+      (long long)0xDEADBEEFDEADBEEFULL, (long long)0xCAFEBABECAFEBABEULL,
+      (long long)0xF00DFACEF00DFACEULL, (long long)0x0123456789ABCDEFULL
+    })),
+    (__mmask8)0x06,
+    ((__m256i)((__v4di){
+      (long long)0x0000000000000000ULL, (long long)0xFFFFFFFFFFFFFFFFULL,
+      (long long)0x13579BDF13579BDFULL, (long long)0x0AAAAAAAAAAAAAAULL
+    })),
+    ((__m256i)((__v4di){
+      (long long)0x1111111111111111ULL, (long long)0x2222222222222222ULL,
+      (long long)0x3333333333333333ULL, (long long)0x4444444444444444ULL
+    })),
+    (unsigned char)0x23),
+  (long long)0xDEADBEEFDEADBEEFULL,
+  (long long)0x0000000000000000ULL,
+  (long long)0x2CA024202CA02420ULL,
+  (long long)0x0123456789ABCDEFULL));
 
 __m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   // CHECK-LABEL: test_mm256_maskz_ternarylogic_epi64
@@ -8713,28 +8731,38 @@ __m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i _
 }
 TEST_CONSTEXPR(match_v4di(
   _mm256_maskz_ternarylogic_epi64(
-    (__mmask8)0x33,
-    ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})),
-    ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})),
-    ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})),
-    (unsigned char)0xCA),
-  0xB, 0xC, 0x0, 0x0));
-TEST_CONSTEXPR(match_v4di(
-  _mm256_maskz_ternarylogic_epi64(
-    (__mmask8)0xCC,
-    ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
-    ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
-    ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
+    (__mmask8)0x05,
+    ((__m256i)((__v4di){
+      (long long)0x1, (long long)0x2, (long long)0x0, (long long)0x7
+    })),
+    ((__m256i)((__v4di){
+      (long long)0x0, (long long)0x3, (long long)0x4, (long long)0x0
+    })),
+    ((__m256i)((__v4di){
+      (long long)0x0, (long long)0x5, (long long)0x0, (long long)0x1
+    })),
     (unsigned char)0xFE),
-  0x0, 0x0, 0xF, 0xF));
+  (long long)0x1,
+  (long long)0x0,
+  (long long)0x4,
+  (long long)0x0));
 TEST_CONSTEXPR(match_v4di(
   _mm256_maskz_ternarylogic_epi64(
-    (__mmask8)0x55,
-    ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})),
-    ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})),
-    ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})),
-    (unsigned char)0x80),
-  0x0, 0x0, 0x0, 0x0));
+    (__mmask8)0x0A,
+    ((__m256i)((__v4di){
+      (long long)0x1, (long long)0x0, (long long)0x2, (long long)0x1
+    })),
+    ((__m256i)((__v4di){
+      (long long)0x0, (long long)0x1, (long long)0x0, (long long)0x0
+    })),
+    ((__m256i)((__v4di){
+      (long long)0x0, (long long)0x0, (long long)0x4, (long long)0x1
+    })),
+    (unsigned char)0xED),
+  (long long)0x0,
+  (long long)-0x1,
+  (long long)0x0,
+  (long long)-0x1));
 
 __m256 test_mm256_shuffle_f32x4(__m256 __A, __m256 __B) {
   // CHECK-LABEL: test_mm256_shuffle_f32x4

>From 7c7ca5f8b18c53b12166b11d428b4e8066ea3225 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Wed, 1 Oct 2025 08:58:47 +0200
Subject: [PATCH 16/48] [IR] Introduce !captures metadata (#160913)

This introduces `!captures` metadata on stores, which looks like this:

```
store ptr %x, ptr %y, !captures !{!"address", !"read_provenance"}
```

The semantics are the same as replacing the store with a call like this:
```
call void @llvm.store(ptr captures(address, read_provenance) %x, ptr %y)
```

This metadata is intended for annotation by frontends -- it's not
something we can feasibly infer at this point, as it would require
analyzing uses of the pointer stored in memory.

The motivating use case for this is Rust's `println!()` machinery, which
involves storing a reference to the value inside a structure. This means
that printing code (including conditional debugging code), can inhibit
optimizations because the pointer escapes. With the new metadata we can
annotate this as a read-only capture, which has less impact on
optimizations.
---
 llvm/docs/LangRef.rst                         |  29 +++
 llvm/include/llvm/IR/FixedMetadataKinds.def   |   1 +
 llvm/include/llvm/IR/Metadata.h               |   8 +
 llvm/lib/Analysis/CaptureTracking.cpp         |   6 +-
 llvm/lib/IR/Metadata.cpp                      |  35 ++++
 llvm/lib/IR/Verifier.cpp                      |  25 +++
 llvm/lib/Transforms/Utils/Local.cpp           |   6 +
 .../Transforms/FunctionAttrs/nocapture.ll     |  68 +++++++
 .../SimplifyCFG/hoist-with-metadata.ll        | 171 ++++++++++++++++++
 llvm/test/Verifier/captures-metadata.ll       |  37 ++++
 10 files changed, 385 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Verifier/captures-metadata.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 8e863939781a2..22b58bf0f5735 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -1489,6 +1489,8 @@ Currently, only the following parameter attributes are defined:
     function, returning a pointer to allocated storage disjoint from the
     storage for any other object accessible to the caller.
 
+.. _captures_attr:
+
 ``captures(...)``
     This attribute restricts the ways in which the callee may capture the
     pointer. This is not a valid attribute for return values. This attribute
@@ -7543,6 +7545,33 @@ The number of bytes known to be dereferenceable is specified by the integer
 value in the metadata node. This is analogous to the ''dereferenceable_or_null''
 attribute on parameters and return values.
 
+'``captures``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``!captures`` metadata can only be applied to ``store`` instructions with
+a pointer-typed value operand. It restricts the capturing behavior of the store
+value operand in the same way the ``captures(...)`` attribute would do on a
+call. See the :ref:`pointer capture section <pointercapture>` for a detailed
+discussion of capture semantics.
+
+The ``!captures`` metadata accepts a non-empty list of strings from the same
+set as the :ref:`captures attribute <captures_attr>`:
+``!"address"``, ``!"address_is_null"``, ``!"provenance"`` and
+``!"read_provenance"``. ``!"none"`` is not supported.
+
+For example ``store ptr %x, ptr %y, !captures !{!"address"}`` indicates that
+the copy of pointer ``%x`` stored to location ``%y`` will only be used to
+inspect its integral address value, and not dereferenced. Dereferencing the
+pointer would result in undefined behavior.
+
+Similarly ``store ptr %x, ptr %y, !captures !{!"address", !"read_provenance"}``
+indicates that while reads through the stored pointer are allowed, writes would
+result in undefined behavior.
+
+The ``!captures`` attribute makes no statement about other uses of ``%x``, or
+uses of the stored-to memory location after it has been overwritten with a
+different value.
+
 .. _llvm.loop:
 
 '``llvm.loop``'
diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def
index d09cc15d65ff6..0603abcd6a4da 100644
--- a/llvm/include/llvm/IR/FixedMetadataKinds.def
+++ b/llvm/include/llvm/IR/FixedMetadataKinds.def
@@ -55,3 +55,4 @@ LLVM_FIXED_MD_KIND(MD_mmra, "mmra", 40)
 LLVM_FIXED_MD_KIND(MD_noalias_addrspace, "noalias.addrspace", 41)
 LLVM_FIXED_MD_KIND(MD_callee_type, "callee_type", 42)
 LLVM_FIXED_MD_KIND(MD_nofree, "nofree", 43)
+LLVM_FIXED_MD_KIND(MD_captures, "captures", 44)
diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index 990bdc618f240..85a7f8fd373c0 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -41,6 +41,7 @@
 
 namespace llvm {
 
+enum class CaptureComponents : uint8_t;
 class Module;
 class ModuleSlotTracker;
 class raw_ostream;
@@ -1480,6 +1481,13 @@ class MDNode : public Metadata {
   LLVM_ABI static MDNode *getMergedCallsiteMetadata(MDNode *A, MDNode *B);
   LLVM_ABI static MDNode *getMergedCalleeTypeMetadata(const MDNode *A,
                                                       const MDNode *B);
+
+  /// Convert !captures metadata to CaptureComponents. MD may be nullptr.
+  LLVM_ABI static CaptureComponents toCaptureComponents(const MDNode *MD);
+  /// Convert CaptureComponents to !captures metadata. The return value may be
+  /// nullptr.
+  LLVM_ABI static MDNode *fromCaptureComponents(LLVMContext &Ctx,
+                                                CaptureComponents CC);
 };
 
 /// Tuple of metadata.
diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp
index a0fe7f9037e47..22229d9c26b3b 100644
--- a/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/llvm/lib/Analysis/CaptureTracking.cpp
@@ -320,8 +320,12 @@ UseCaptureInfo llvm::DetermineUseCaptureKind(const Use &U, const Value *Base) {
     return CaptureComponents::None;
   case Instruction::Store:
     // Stored the pointer - conservatively assume it may be captured.
+    if (U.getOperandNo() == 0)
+      return MDNode::toCaptureComponents(
+          I->getMetadata(LLVMContext::MD_captures));
+
     // Volatile stores make the address observable.
-    if (U.getOperandNo() == 0 || cast<StoreInst>(I)->isVolatile())
+    if (cast<StoreInst>(I)->isVolatile())
       return CaptureComponents::All;
     return CaptureComponents::None;
   case Instruction::AtomicRMW: {
diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index 9cfb0ff4d689a..1add0c7930bc9 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ModRef.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
@@ -1435,6 +1436,40 @@ MDNode *MDNode::getMostGenericAlignmentOrDereferenceable(MDNode *A, MDNode *B) {
   return B;
 }
 
+CaptureComponents MDNode::toCaptureComponents(const MDNode *MD) {
+  if (!MD)
+    return CaptureComponents::All;
+
+  CaptureComponents CC = CaptureComponents::None;
+  for (Metadata *Op : MD->operands()) {
+    CaptureComponents Component =
+        StringSwitch<CaptureComponents>(cast<MDString>(Op)->getString())
+            .Case("address", CaptureComponents::Address)
+            .Case("address_is_null", CaptureComponents::AddressIsNull)
+            .Case("provenance", CaptureComponents::Provenance)
+            .Case("read_provenance", CaptureComponents::ReadProvenance);
+    CC |= Component;
+  }
+  return CC;
+}
+
+MDNode *MDNode::fromCaptureComponents(LLVMContext &Ctx, CaptureComponents CC) {
+  assert(!capturesNothing(CC) && "Can't encode captures(none)");
+  if (capturesAll(CC))
+    return nullptr;
+
+  SmallVector<Metadata *> Components;
+  if (capturesAddressIsNullOnly(CC))
+    Components.push_back(MDString::get(Ctx, "address_is_null"));
+  else if (capturesAddress(CC))
+    Components.push_back(MDString::get(Ctx, "address"));
+  if (capturesReadProvenanceOnly(CC))
+    Components.push_back(MDString::get(Ctx, "read_provenance"));
+  else if (capturesFullProvenance(CC))
+    Components.push_back(MDString::get(Ctx, "provenance"));
+  return MDNode::get(Ctx, Components);
+}
+
 //===----------------------------------------------------------------------===//
 // NamedMDNode implementation.
 //
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 8c03d6f809d50..6b3cd27b77a7a 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -542,6 +542,7 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   void visitAliasScopeMetadata(const MDNode *MD);
   void visitAliasScopeListMetadata(const MDNode *MD);
   void visitAccessGroupMetadata(const MDNode *MD);
+  void visitCapturesMetadata(Instruction &I, const MDNode *Captures);
 
   template <class Ty> bool isValidMetadataArray(const MDTuple &N);
 #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
@@ -5373,6 +5374,27 @@ void Verifier::visitAccessGroupMetadata(const MDNode *MD) {
   }
 }
 
+void Verifier::visitCapturesMetadata(Instruction &I, const MDNode *Captures) {
+  static const char *ValidArgs[] = {"address_is_null", "address",
+                                    "read_provenance", "provenance"};
+
+  auto *SI = dyn_cast<StoreInst>(&I);
+  Check(SI, "!captures metadata can only be applied to store instructions", &I);
+  Check(SI->getValueOperand()->getType()->isPointerTy(),
+        "!captures metadata can only be applied to store with value operand of "
+        "pointer type",
+        &I);
+  Check(Captures->getNumOperands() != 0, "!captures metadata cannot be empty",
+        &I);
+
+  for (Metadata *Op : Captures->operands()) {
+    auto *Str = dyn_cast<MDString>(Op);
+    Check(Str, "!captures metadata must be a list of strings", &I);
+    Check(is_contained(ValidArgs, Str->getString()),
+          "invalid entry in !captures metadata", &I, Str);
+  }
+}
+
 /// verifyInstruction - Verify that an instruction is well formed.
 ///
 void Verifier::visitInstruction(Instruction &I) {
@@ -5600,6 +5622,9 @@ void Verifier::visitInstruction(Instruction &I) {
   if (MDNode *Annotation = I.getMetadata(LLVMContext::MD_annotation))
     visitAnnotationMetadata(Annotation);
 
+  if (MDNode *Captures = I.getMetadata(LLVMContext::MD_captures))
+    visitCapturesMetadata(I, Captures);
+
   if (MDNode *N = I.getDebugLoc().getAsMDNode()) {
     CheckDI(isa<DILocation>(N), "invalid !dbg metadata attachment", &I, N);
     visitMDNode(*N, AreDebugLocsAllowed::Yes);
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 123881e276584..21b2652d04120 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3025,6 +3025,12 @@ static void combineMetadata(Instruction *K, const Instruction *J,
         // Preserve !nosanitize if both K and J have it.
         K->setMetadata(Kind, JMD);
         break;
+      case LLVMContext::MD_captures:
+        K->setMetadata(
+            Kind, MDNode::fromCaptureComponents(
+                      K->getContext(), MDNode::toCaptureComponents(JMD) |
+                                           MDNode::toCaptureComponents(KMD)));
+        break;
       }
   }
   // Set !invariant.group from J if J has it. If both instructions have it
diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll
index 60a4214548a72..8113ba65fe422 100644
--- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll
@@ -1398,5 +1398,73 @@ define void @assume_nonnull(ptr %p) {
   ret void
 }
 
+define void @captures_metadata_address_is_null(ptr %x, ptr %y) {
+; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
+; FNATTRS-LABEL: define void @captures_metadata_address_is_null
+; FNATTRS-SAME: (ptr captures(address_is_null) [[X:%.*]], ptr writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) #[[ATTR17]] {
+; FNATTRS-NEXT:    store ptr [[X]], ptr [[Y]], align 8, !captures [[META0:![0-9]+]]
+; FNATTRS-NEXT:    ret void
+;
+; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
+; ATTRIBUTOR-LABEL: define void @captures_metadata_address_is_null
+; ATTRIBUTOR-SAME: (ptr nofree writeonly [[X:%.*]], ptr nofree nonnull writeonly captures(none) [[Y:%.*]]) #[[ATTR13]] {
+; ATTRIBUTOR-NEXT:    store ptr [[X]], ptr [[Y]], align 8, !captures [[META0:![0-9]+]]
+; ATTRIBUTOR-NEXT:    ret void
+;
+  store ptr %x, ptr %y, !captures !{!"address_is_null"}
+  ret void
+}
+
+define void @captures_metadata_address(ptr %x, ptr %y) {
+; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
+; FNATTRS-LABEL: define void @captures_metadata_address
+; FNATTRS-SAME: (ptr captures(address) [[X:%.*]], ptr writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) #[[ATTR17]] {
+; FNATTRS-NEXT:    store ptr [[X]], ptr [[Y]], align 8, !captures [[META1:![0-9]+]]
+; FNATTRS-NEXT:    ret void
+;
+; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
+; ATTRIBUTOR-LABEL: define void @captures_metadata_address
+; ATTRIBUTOR-SAME: (ptr nofree writeonly [[X:%.*]], ptr nofree nonnull writeonly captures(none) [[Y:%.*]]) #[[ATTR13]] {
+; ATTRIBUTOR-NEXT:    store ptr [[X]], ptr [[Y]], align 8, !captures [[META1:![0-9]+]]
+; ATTRIBUTOR-NEXT:    ret void
+;
+  store ptr %x, ptr %y, !captures !{!"address"}
+  ret void
+}
+
+define void @captures_metadata_address_read_provenance(ptr %x, ptr %y) {
+; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
+; FNATTRS-LABEL: define void @captures_metadata_address_read_provenance
+; FNATTRS-SAME: (ptr captures(address, read_provenance) [[X:%.*]], ptr writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) #[[ATTR17]] {
+; FNATTRS-NEXT:    store ptr [[X]], ptr [[Y]], align 8, !captures [[META2:![0-9]+]]
+; FNATTRS-NEXT:    ret void
+;
+; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
+; ATTRIBUTOR-LABEL: define void @captures_metadata_address_read_provenance
+; ATTRIBUTOR-SAME: (ptr nofree writeonly [[X:%.*]], ptr nofree nonnull writeonly captures(none) [[Y:%.*]]) #[[ATTR13]] {
+; ATTRIBUTOR-NEXT:    store ptr [[X]], ptr [[Y]], align 8, !captures [[META2:![0-9]+]]
+; ATTRIBUTOR-NEXT:    ret void
+;
+  store ptr %x, ptr %y, !captures !{!"address", !"read_provenance"}
+  ret void
+}
+
+define void @captures_metadata_provenance(ptr %x, ptr %y) {
+; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
+; FNATTRS-LABEL: define void @captures_metadata_provenance
+; FNATTRS-SAME: (ptr captures(provenance) [[X:%.*]], ptr writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) #[[ATTR17]] {
+; FNATTRS-NEXT:    store ptr [[X]], ptr [[Y]], align 8, !captures [[META3:![0-9]+]]
+; FNATTRS-NEXT:    ret void
+;
+; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
+; ATTRIBUTOR-LABEL: define void @captures_metadata_provenance
+; ATTRIBUTOR-SAME: (ptr nofree writeonly [[X:%.*]], ptr nofree nonnull writeonly captures(none) [[Y:%.*]]) #[[ATTR13]] {
+; ATTRIBUTOR-NEXT:    store ptr [[X]], ptr [[Y]], align 8, !captures [[META3:![0-9]+]]
+; ATTRIBUTOR-NEXT:    ret void
+;
+  store ptr %x, ptr %y, !captures !{!"provenance"}
+  ret void
+}
+
 declare ptr @llvm.launder.invariant.group.p0(ptr)
 declare ptr @llvm.strip.invariant.group.p0(ptr)
diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll
index d34ac2bb30040..85c8ed20210b8 100644
--- a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll
+++ b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll
@@ -424,6 +424,174 @@ join:
   ret ptr %phi
 }
 
+define void @hoist_captures_same(i1 %c, ptr %x, ptr %y) {
+; CHECK-LABEL: @hoist_captures_same(
+; CHECK-NEXT:  if:
+; CHECK-NEXT:    store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META9:![0-9]+]]
+; CHECK-NEXT:    ret void
+;
+if:
+  br i1 %c, label %then, label %else
+
+then:
+  store ptr %x, ptr %y, !captures !{!"address"}
+  br label %out
+
+else:
+  store ptr %x, ptr %y, !captures !{!"address"}
+  br label %out
+
+out:
+  ret void
+}
+
+define void @hoist_captures_different(i1 %c, ptr %x, ptr %y) {
+; CHECK-LABEL: @hoist_captures_different(
+; CHECK-NEXT:  if:
+; CHECK-NEXT:    store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META10:![0-9]+]]
+; CHECK-NEXT:    ret void
+;
+if:
+  br i1 %c, label %then, label %else
+
+then:
+  store ptr %x, ptr %y, !captures !{!"address"}
+  br label %out
+
+else:
+  store ptr %x, ptr %y, !captures !{!"read_provenance"}
+  br label %out
+
+out:
+  ret void
+}
+
+define void @hoist_captures_overlap(i1 %c, ptr %x, ptr %y) {
+; CHECK-LABEL: @hoist_captures_overlap(
+; CHECK-NEXT:  if:
+; CHECK-NEXT:    store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META10]]
+; CHECK-NEXT:    ret void
+;
+if:
+  br i1 %c, label %then, label %else
+
+then:
+  store ptr %x, ptr %y, !captures !{!"address"}
+  br label %out
+
+else:
+  store ptr %x, ptr %y, !captures !{!"address", !"read_provenance"}
+  br label %out
+
+out:
+  ret void
+}
+
+define void @hoist_captures_subsume1(i1 %c, ptr %x, ptr %y) {
+; CHECK-LABEL: @hoist_captures_subsume1(
+; CHECK-NEXT:  if:
+; CHECK-NEXT:    store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META9]]
+; CHECK-NEXT:    ret void
+;
+if:
+  br i1 %c, label %then, label %else
+
+then:
+  store ptr %x, ptr %y, !captures !{!"address_is_null"}
+  br label %out
+
+else:
+  store ptr %x, ptr %y, !captures !{!"address"}
+  br label %out
+
+out:
+  ret void
+}
+
+define void @hoist_captures_subsume2(i1 %c, ptr %x, ptr %y) {
+; CHECK-LABEL: @hoist_captures_subsume2(
+; CHECK-NEXT:  if:
+; CHECK-NEXT:    store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META11:![0-9]+]]
+; CHECK-NEXT:    ret void
+;
+if:
+  br i1 %c, label %then, label %else
+
+then:
+  store ptr %x, ptr %y, !captures !{!"provenance"}
+  br label %out
+
+else:
+  store ptr %x, ptr %y, !captures !{!"read_provenance"}
+  br label %out
+
+out:
+  ret void
+}
+
+define void @hoist_captures_full_set(i1 %c, ptr %x, ptr %y) {
+; CHECK-LABEL: @hoist_captures_full_set(
+; CHECK-NEXT:  if:
+; CHECK-NEXT:    store ptr [[X:%.*]], ptr [[Y:%.*]], align 8
+; CHECK-NEXT:    ret void
+;
+if:
+  br i1 %c, label %then, label %else
+
+then:
+  store ptr %x, ptr %y, !captures !{!"address"}
+  br label %out
+
+else:
+  store ptr %x, ptr %y, !captures !{!"provenance"}
+  br label %out
+
+out:
+  ret void
+}
+
+define void @hoist_captures_only_one1(i1 %c, ptr %x, ptr %y) {
+; CHECK-LABEL: @hoist_captures_only_one1(
+; CHECK-NEXT:  if:
+; CHECK-NEXT:    store ptr [[X:%.*]], ptr [[Y:%.*]], align 8
+; CHECK-NEXT:    ret void
+;
+if:
+  br i1 %c, label %then, label %else
+
+then:
+  store ptr %x, ptr %y, !captures !{!"address"}
+  br label %out
+
+else:
+  store ptr %x, ptr %y
+  br label %out
+
+out:
+  ret void
+}
+
+define void @hoist_captures_only_one2(i1 %c, ptr %x, ptr %y) {
+; CHECK-LABEL: @hoist_captures_only_one2(
+; CHECK-NEXT:  if:
+; CHECK-NEXT:    store ptr [[X:%.*]], ptr [[Y:%.*]], align 8
+; CHECK-NEXT:    ret void
+;
+if:
+  br i1 %c, label %then, label %else
+
+then:
+  store ptr %x, ptr %y
+  br label %out
+
+else:
+  store ptr %x, ptr %y, !captures !{!"address"}
+  br label %out
+
+out:
+  ret void
+}
+
 !0 = !{ i8 0, i8 1 }
 !1 = !{ i8 3, i8 5 }
 !2 = !{}
@@ -445,4 +613,7 @@ join:
 ; CHECK: [[META6]] = !{float 2.500000e+00}
 ; CHECK: [[META7]] = !{i32 5, i32 6}
 ; CHECK: [[META8]] = !{i32 4, i32 5}
+; CHECK: [[META9]] = !{!"address"}
+; CHECK: [[META10]] = !{!"address", !"read_provenance"}
+; CHECK: [[META11]] = !{!"provenance"}
 ;.
diff --git a/llvm/test/Verifier/captures-metadata.ll b/llvm/test/Verifier/captures-metadata.ll
new file mode 100644
index 0000000000000..ae08ddd036f16
--- /dev/null
+++ b/llvm/test/Verifier/captures-metadata.ll
@@ -0,0 +1,37 @@
+; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
+
+; CHECK: !captures metadata can only be applied to store instructions
+define void @wrong_instr_type(ptr %x) {
+  load ptr, ptr %x, !captures !{!"address"}
+  ret void
+}
+
+; CHECK: captures metadata can only be applied to store with value operand of pointer type
+define void @wrong_op_type(i32 %x, ptr %y) {
+  store i32 %x, ptr %y, !captures !{!"address"}
+  ret void
+}
+
+; CHECK: !captures metadata cannot be empty
+define void @empty(ptr %x, ptr %y) {
+  store ptr %x, ptr %y, !captures !{}
+  ret void
+}
+
+; CHECK: !captures metadata must be a list of strings
+define void @not_string(ptr %x, ptr %y) {
+  store ptr %x, ptr %y, !captures !{!{}}
+  ret void
+}
+
+; CHECK: invalid entry in !captures metadata
+define void @invalid_str(ptr %x, ptr %y) {
+  store ptr %x, ptr %y, !captures !{!"foo"}
+  ret void
+}
+
+; CHECK: invalid entry in !captures metadata
+define void @invalid_none(ptr %x, ptr %y) {
+  store ptr %x, ptr %y, !captures !{!"none"}
+  ret void
+}

>From 154b805be366fd4648b3bc44253d6e72e70ba147 Mon Sep 17 00:00:00 2001
From: quic_hchandel <hchandel at qti.qualcomm.com>
Date: Wed, 1 Oct 2025 12:43:32 +0530
Subject: [PATCH 17/48] [RISCV] Add commutative support for Qualcomm uC Xqcics
 extension (#161328)

This is a follow-up to #160653 doing similar changes for Xqcics.
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp    |  10 ++
 llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td |   4 +-
 llvm/test/CodeGen/RISCV/xqcics.ll           | 124 ++++++++++++++++++++
 3 files changed, 136 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 70b6c7ea35f82..1e6b04f8a4281 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3793,6 +3793,11 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
       return false;
     // Operands 1 and 2 are commutable, if we switch the opcode.
     return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
+  case RISCV::QC_SELECTIEQ:
+  case RISCV::QC_SELECTINE:
+  case RISCV::QC_SELECTIIEQ:
+  case RISCV::QC_SELECTIINE:
+    return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
   case RISCV::QC_MVEQ:
   case RISCV::QC_MVNE:
   case RISCV::QC_MVLT:
@@ -4018,6 +4023,11 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
     return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,
                                                    OpIdx2);
   }
+  case RISCV::QC_SELECTIEQ:
+  case RISCV::QC_SELECTINE:
+  case RISCV::QC_SELECTIIEQ:
+  case RISCV::QC_SELECTIINE:
+    return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
   case RISCV::QC_MVEQ:
   case RISCV::QC_MVNE:
   case RISCV::QC_MVLT:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index ff4a0406799b1..540786851e2d5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -524,7 +524,7 @@ class QCIRVInstRI<bits<1> funct1, DAGOperand InTyImm11,
   let Inst{30-20} = imm11;
 }
 
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in
 class QCISELECTIICC<bits<3> funct3, string opcodestr>
     : RVInstR4<0b00, funct3, OPC_CUSTOM_2, (outs GPRNoX0:$rd_wb),
                (ins GPRNoX0:$rd, GPRNoX0:$rs1, simm5:$simm1, simm5:$simm2),
@@ -537,7 +537,7 @@ class QCISELECTIICC<bits<3> funct3, string opcodestr>
   let rs2 = simm1;
 }
 
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in
 class QCISELECTICC<bits<3> funct3, string opcodestr>
     : RVInstR4<0b01, funct3, OPC_CUSTOM_2, (outs GPRNoX0:$rd_wb),
                (ins GPRNoX0:$rd, GPRNoX0:$rs1, GPRNoX0:$rs2, simm5:$simm2),
diff --git a/llvm/test/CodeGen/RISCV/xqcics.ll b/llvm/test/CodeGen/RISCV/xqcics.ll
index 5b7ca9e7fedb8..60fc98c5de663 100644
--- a/llvm/test/CodeGen/RISCV/xqcics.ll
+++ b/llvm/test/CodeGen/RISCV/xqcics.ll
@@ -690,3 +690,127 @@ entry:
   ret i32 %sel
 }
 
+define i32 @select_cc_example_eq1(i32 %a, i32 %b, i32 %x, i32 %y) {
+; RV32I-LABEL: select_cc_example_eq1:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    beq a1, a0, .LBB21_2
+; RV32I-NEXT:  # %bb.1: # %entry
+; RV32I-NEXT:    li a2, 11
+; RV32I-NEXT:  .LBB21_2: # %entry
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32IXQCICS-LABEL: select_cc_example_eq1:
+; RV32IXQCICS:       # %bb.0: # %entry
+; RV32IXQCICS-NEXT:    qc.selectieq a0, a1, a2, 11
+; RV32IXQCICS-NEXT:    ret
+;
+; RV32IXQCICM-LABEL: select_cc_example_eq1:
+; RV32IXQCICM:       # %bb.0: # %entry
+; RV32IXQCICM-NEXT:    qc.selectieq a0, a1, a2, 11
+; RV32IXQCICM-NEXT:    ret
+;
+; RV32IXQCI-LABEL: select_cc_example_eq1:
+; RV32IXQCI:       # %bb.0: # %entry
+; RV32IXQCI-NEXT:    qc.line a2, a1, a0, 11
+; RV32IXQCI-NEXT:    mv a0, a2
+; RV32IXQCI-NEXT:    ret
+entry:
+  %cmp = icmp eq i32 %b, %a
+  %sel = select i1 %cmp, i32 %x, i32 11
+  ret i32 %sel
+}
+
+define i32 @select_cc_example_ne1(i32 %a, i32 %b, i32 %x, i32 %y) {
+; RV32I-LABEL: select_cc_example_ne1:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    bne a1, a0, .LBB22_2
+; RV32I-NEXT:  # %bb.1: # %entry
+; RV32I-NEXT:    li a2, 11
+; RV32I-NEXT:  .LBB22_2: # %entry
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32IXQCICS-LABEL: select_cc_example_ne1:
+; RV32IXQCICS:       # %bb.0: # %entry
+; RV32IXQCICS-NEXT:    qc.selectine a0, a1, a2, 11
+; RV32IXQCICS-NEXT:    ret
+;
+; RV32IXQCICM-LABEL: select_cc_example_ne1:
+; RV32IXQCICM:       # %bb.0: # %entry
+; RV32IXQCICM-NEXT:    qc.selectine a0, a1, a2, 11
+; RV32IXQCICM-NEXT:    ret
+;
+; RV32IXQCI-LABEL: select_cc_example_ne1:
+; RV32IXQCI:       # %bb.0: # %entry
+; RV32IXQCI-NEXT:    qc.lieq a2, a1, a0, 11
+; RV32IXQCI-NEXT:    mv a0, a2
+; RV32IXQCI-NEXT:    ret
+entry:
+  %cmp = icmp ne i32 %b, %a
+  %sel = select i1 %cmp, i32 %x, i32 11
+  ret i32 %sel
+}
+
+
+define i32 @select_cc_example_eq2(i32 %a, i32 %b, i32 %x, i32 %y) {
+; RV32I-LABEL: select_cc_example_eq2:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    beq a1, a0, .LBB23_2
+; RV32I-NEXT:  # %bb.1: # %entry
+; RV32I-NEXT:    li a0, 11
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB23_2:
+; RV32I-NEXT:    li a0, 15
+; RV32I-NEXT:    ret
+;
+; RV32IXQCICS-LABEL: select_cc_example_eq2:
+; RV32IXQCICS:       # %bb.0: # %entry
+; RV32IXQCICS-NEXT:    qc.selectiieq a0, a1, 15, 11
+; RV32IXQCICS-NEXT:    ret
+;
+; RV32IXQCICM-LABEL: select_cc_example_eq2:
+; RV32IXQCICM:       # %bb.0: # %entry
+; RV32IXQCICM-NEXT:    qc.selectiieq a0, a1, 15, 11
+; RV32IXQCICM-NEXT:    ret
+;
+; RV32IXQCI-LABEL: select_cc_example_eq2:
+; RV32IXQCI:       # %bb.0: # %entry
+; RV32IXQCI-NEXT:    qc.selectiieq a0, a1, 15, 11
+; RV32IXQCI-NEXT:    ret
+entry:
+  %cmp = icmp eq i32 %b, %a
+  %sel = select i1 %cmp, i32 15, i32 11
+  ret i32 %sel
+}
+
+define i32 @select_cc_example_ne2(i32 %a, i32 %b, i32 %x, i32 %y) {
+; RV32I-LABEL: select_cc_example_ne2:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    bne a1, a0, .LBB24_2
+; RV32I-NEXT:  # %bb.1: # %entry
+; RV32I-NEXT:    li a0, 11
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB24_2:
+; RV32I-NEXT:    li a0, 15
+; RV32I-NEXT:    ret
+;
+; RV32IXQCICS-LABEL: select_cc_example_ne2:
+; RV32IXQCICS:       # %bb.0: # %entry
+; RV32IXQCICS-NEXT:    qc.selectiine a0, a1, 15, 11
+; RV32IXQCICS-NEXT:    ret
+;
+; RV32IXQCICM-LABEL: select_cc_example_ne2:
+; RV32IXQCICM:       # %bb.0: # %entry
+; RV32IXQCICM-NEXT:    qc.selectiine a0, a1, 15, 11
+; RV32IXQCICM-NEXT:    ret
+;
+; RV32IXQCI-LABEL: select_cc_example_ne2:
+; RV32IXQCI:       # %bb.0: # %entry
+; RV32IXQCI-NEXT:    qc.selectiine a0, a1, 15, 11
+; RV32IXQCI-NEXT:    ret
+entry:
+  %cmp = icmp ne i32 %b, %a
+  %sel = select i1 %cmp, i32 15, i32 11
+  ret i32 %sel
+}

>From ed63a58a567b55c72bf3bd17931c24f55bf3ca92 Mon Sep 17 00:00:00 2001
From: jeanPerier <jperier at nvidia.com>
Date: Wed, 1 Oct 2025 09:21:17 +0200
Subject: [PATCH 18/48] [flang] add helper to create descriptor with new base
 address (#161347)

There is currently no helper to create a descriptor for a copy of a
Fortran entity based on the descriptor of the original entity and the
base address of the copy (most places that are doing this currently are
also doing allocation of the copy at the same time or using the
runtime).
Add a helper for this with a unit test.
---
 .../flang/Optimizer/Builder/FIRBuilder.h      |  9 ++++
 flang/lib/Optimizer/Builder/FIRBuilder.cpp    | 22 +++++++++
 .../Optimizer/Builder/FIRBuilderTest.cpp      | 47 +++++++++++++++++++
 3 files changed, 78 insertions(+)

diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index 4b3087ed45788..d3af3bafbf279 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -959,6 +959,15 @@ mlir::Value genLifetimeStart(mlir::OpBuilder &builder, mlir::Location loc,
 void genLifetimeEnd(mlir::OpBuilder &builder, mlir::Location loc,
                     mlir::Value mem);
 
+/// Given a fir.box or fir.class \p box describing an entity and a raw address
+/// \p newAddr for an entity with the same Fortran properties (rank, dynamic
+/// type, length parameters and bounds) and attributes (POINTER or ALLOCATABLE),
+/// create a box for \p newAddr with the same type as \p box. This assumes \p
+/// newAddr is for contiguous storage (\p box does not have to be contiguous).
+mlir::Value getDescriptorWithNewBaseAddress(fir::FirOpBuilder &builder,
+                                            mlir::Location loc, mlir::Value box,
+                                            mlir::Value newAddr);
+
 } // namespace fir::factory
 
 #endif // FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index b6501fd530992..5e6e20861fd85 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -1974,3 +1974,25 @@ void fir::factory::genLifetimeEnd(mlir::OpBuilder &builder, mlir::Location loc,
                                   mlir::Value cast) {
   mlir::LLVM::LifetimeEndOp::create(builder, loc, cast);
 }
+
+mlir::Value fir::factory::getDescriptorWithNewBaseAddress(
+    fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value box,
+    mlir::Value newAddr) {
+  auto boxType = llvm::dyn_cast<fir::BaseBoxType>(box.getType());
+  assert(boxType &&
+         "expected a box type input in getDescriptorWithNewBaseAddress");
+  if (boxType.isAssumedRank())
+    TODO(loc, "changing descriptor base address for an assumed rank entity");
+  llvm::SmallVector<mlir::Value> lbounds;
+  fir::factory::genDimInfoFromBox(builder, loc, box, &lbounds,
+                                  /*extents=*/nullptr, /*strides=*/nullptr);
+  fir::BoxValue inputBoxValue(box, lbounds, /*explicitParams=*/{});
+  fir::ExtendedValue openedInput =
+      fir::factory::readBoxValue(builder, loc, inputBoxValue);
+  mlir::Value shape = fir::isArray(openedInput)
+                          ? builder.createShape(loc, openedInput)
+                          : mlir::Value{};
+  mlir::Value typeMold = fir::isPolymorphicType(boxType) ? box : mlir::Value{};
+  return builder.createBox(loc, boxType, newAddr, shape, /*slice=*/{},
+                           fir::getTypeParams(openedInput), typeMold);
+}
diff --git a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp
index e3e364720af67..fffd4ab5446ca 100644
--- a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp
+++ b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp
@@ -644,3 +644,50 @@ TEST_F(FIRBuilderTest, genArithIntegerOverflow) {
   auto op4_ioff = op4_iofi.getOverflowAttr().getValue();
   EXPECT_EQ(op4_ioff, nsw);
 }
+
+TEST_F(FIRBuilderTest, getDescriptorWithNewBaseAddress) {
+  auto builder = getBuilder();
+  auto loc = builder.getUnknownLoc();
+
+  // Build an input fir.box for a 1-D array of i64 with constant extent 10.
+  auto i64Ty = builder.getI64Type();
+  auto seqTy = fir::SequenceType::get({10}, i64Ty);
+  auto refArrTy = fir::ReferenceType::get(seqTy);
+  auto ptrTy = fir::PointerType::get(seqTy);
+  auto boxTy = fir::BoxType::get(ptrTy);
+  // Create an undef box descriptor value (descriptor contents are unspecified).
+  mlir::Value inputBox = fir::UndefOp::create(builder, loc, boxTy);
+
+  // New base address (same element type and properties).
+  mlir::Value addr2 = fir::UndefOp::create(builder, loc, refArrTy);
+
+  mlir::Value newBox = fir::factory::getDescriptorWithNewBaseAddress(
+      builder, loc, inputBox, addr2);
+
+  // The returned descriptor must have the same type as the input box.
+  EXPECT_EQ(newBox.getType(), inputBox.getType());
+
+  // It must be constructed by an embox using the new base address.
+  ASSERT_TRUE(llvm::isa_and_nonnull<fir::EmboxOp>(newBox.getDefiningOp()));
+  auto embox = llvm::dyn_cast<fir::EmboxOp>(newBox.getDefiningOp());
+  EXPECT_EQ(embox.getMemref(), addr2);
+
+  // The shape should be derived from the input box; expect a fir.shape with one
+  // extent that comes from a fir.box_dims reading from the original input box.
+  mlir::Value shape = embox.getShape();
+  ASSERT_TRUE(shape);
+  ASSERT_TRUE(llvm::isa_and_nonnull<fir::ShapeShiftOp>(shape.getDefiningOp()));
+  auto shapeOp = llvm::dyn_cast<fir::ShapeShiftOp>(shape.getDefiningOp());
+  ASSERT_EQ(shapeOp.getExtents().size(), 1u);
+  mlir::Value extent0 = shapeOp.getExtents()[0];
+  ASSERT_TRUE(llvm::isa_and_nonnull<fir::BoxDimsOp>(extent0.getDefiningOp()));
+  auto dimOp = llvm::dyn_cast<fir::BoxDimsOp>(extent0.getDefiningOp());
+  EXPECT_EQ(dimOp.getVal(), inputBox);
+
+  // Also verify the origin comes from a BoxDims on the same input box.
+  ASSERT_EQ(shapeOp.getOrigins().size(), 1u);
+  mlir::Value origin0 = shapeOp.getOrigins()[0];
+  ASSERT_TRUE(llvm::isa_and_nonnull<fir::BoxDimsOp>(origin0.getDefiningOp()));
+  auto lbOp = llvm::dyn_cast<fir::BoxDimsOp>(origin0.getDefiningOp());
+  EXPECT_EQ(lbOp.getVal(), inputBox);
+}

>From f0cf1dde81c8cbe8f7c7fa386d379c073a999bba Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Wed, 1 Oct 2025 08:37:15 +0100
Subject: [PATCH 19/48] [lldb][IRExecutionUnit] Return error on failure to
 resolve function address (#161363)

Starting with https://github.com/llvm/llvm-project/pull/148877 we
started encoding the module ID of the function DIE we are currently
parsing into its `AsmLabel` in the AST. When the JIT asks LLDB to
resolve our special mangled name, we would locate the module and resolve
the function/symbol we found in it.

If we are debugging with a `SymbolFileDWARFDebugMap`, the module ID we
encode is that of the `.o` file that is tracked by the debug-map. To
resolve the address of the DIE in that `.o` file, we have to ask
`SymbolFileDWARFDebugMap::LinkOSOAddress` to turn the address of the
`.o` DIE into a real address in the linked executable. This will only
work if the `.o` address was actually tracked by the debug-map. However,
if the function definition appears in multiple `.o` files (which is the
case for functions defined in headers), the linker will most likely
de-deuplicate that definition. So most `.o`'s definition DIEs for that
function won't have a contribution in the debug-map, and thus we fail to
resolve the address.

When debugging Clang on Darwin, e.g., you'd see:
```
(lldb) expr CXXDecl->getName()

error: Couldn't look up symbols:
  $__lldb_func::0x1:0x4000d000002359da:_ZNK5clang9NamedDecl7getNameEv
Hint: The expression tried to call a function that is not present in the target, perhaps because it was optimized out by the compiler.
```
unless you were stopped in the `.o` file whose definition of `getName`
made it into the final executable.

The fix here is to error out if we fail to resolve the address, causing
us to fall back on the old flow which did a lookup by mangled name,
which the `SymbolFileDWARFDebugMap` will handle correctly.

An alternative fix to this would be to encode the
`SymbolFileDWARFDebugMap`'s module-id. And implement
`SymbolFileDWARFDebugMap::ResolveFunctionCallLabel` by doing a mangled
name lookup. The proposed approach doesn't stop us from implementing
that, so we could choose to do it in a follow-up.

rdar://161393045
---
 lldb/source/Expression/IRExecutionUnit.cpp    |  7 ++++-
 .../function-call-from-object-file/Makefile   |  3 ++
 .../TestFunctionCallFromObjectFile.py         | 29 +++++++++++++++++++
 .../function-call-from-object-file/common.h   |  8 +++++
 .../function-call-from-object-file/lib1.cpp   |  8 +++++
 .../function-call-from-object-file/lib2.cpp   |  6 ++++
 .../function-call-from-object-file/main.cpp   | 10 +++++++
 7 files changed, 70 insertions(+), 1 deletion(-)
 create mode 100644 lldb/test/API/lang/cpp/function-call-from-object-file/Makefile
 create mode 100644 lldb/test/API/lang/cpp/function-call-from-object-file/TestFunctionCallFromObjectFile.py
 create mode 100644 lldb/test/API/lang/cpp/function-call-from-object-file/common.h
 create mode 100644 lldb/test/API/lang/cpp/function-call-from-object-file/lib1.cpp
 create mode 100644 lldb/test/API/lang/cpp/function-call-from-object-file/lib2.cpp
 create mode 100644 lldb/test/API/lang/cpp/function-call-from-object-file/main.cpp

diff --git a/lldb/source/Expression/IRExecutionUnit.cpp b/lldb/source/Expression/IRExecutionUnit.cpp
index 25d4a87b89ef2..60b9de0d21b2e 100644
--- a/lldb/source/Expression/IRExecutionUnit.cpp
+++ b/lldb/source/Expression/IRExecutionUnit.cpp
@@ -751,7 +751,12 @@ ResolveFunctionCallLabel(FunctionCallLabel &label,
   sc_list.Append(*sc_or_err);
 
   LoadAddressResolver resolver(*sc.target_sp, symbol_was_missing_weak);
-  return resolver.Resolve(sc_list).value_or(LLDB_INVALID_ADDRESS);
+  lldb::addr_t resolved_addr =
+      resolver.Resolve(sc_list).value_or(LLDB_INVALID_ADDRESS);
+  if (resolved_addr == LLDB_INVALID_ADDRESS)
+    return llvm::createStringError("couldn't resolve address for function");
+
+  return resolved_addr;
 }
 
 lldb::addr_t
diff --git a/lldb/test/API/lang/cpp/function-call-from-object-file/Makefile b/lldb/test/API/lang/cpp/function-call-from-object-file/Makefile
new file mode 100644
index 0000000000000..285bbfbbca4fe
--- /dev/null
+++ b/lldb/test/API/lang/cpp/function-call-from-object-file/Makefile
@@ -0,0 +1,3 @@
+CXX_SOURCES := main.cpp lib1.cpp lib2.cpp
+
+include Makefile.rules
diff --git a/lldb/test/API/lang/cpp/function-call-from-object-file/TestFunctionCallFromObjectFile.py b/lldb/test/API/lang/cpp/function-call-from-object-file/TestFunctionCallFromObjectFile.py
new file mode 100644
index 0000000000000..f0a7aef182a67
--- /dev/null
+++ b/lldb/test/API/lang/cpp/function-call-from-object-file/TestFunctionCallFromObjectFile.py
@@ -0,0 +1,29 @@
+"""
+Tests that we can call functions that have definitions in multiple
+CUs in the debug-info (which is the case for functions defined in headers).
+The linker will most likely de-duplicate the functiond definitions when linking
+the final executable. On Darwin, this will create a debug-map that LLDB will use
+to fix up object file addresses to addresses in the linked executable. However,
+if we parsed the DIE from the object file whose functiond definition got stripped
+by the linker, LLDB needs to ensure it can still resolve the function symbol it
+got for it.
+"""
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class TestFunctionCallFromObjectFile(TestBase):
+    def test_lib1(self):
+        self.build()
+        lldbutil.run_to_name_breakpoint(self, "lib1_func")
+
+        self.expect_expr("Foo{}.foo()", result_type="int", result_value="15")
+
+    def test_lib2(self):
+        self.build()
+        lldbutil.run_to_name_breakpoint(self, "lib2_func")
+
+        self.expect_expr("Foo{}.foo()", result_type="int", result_value="15")
diff --git a/lldb/test/API/lang/cpp/function-call-from-object-file/common.h b/lldb/test/API/lang/cpp/function-call-from-object-file/common.h
new file mode 100644
index 0000000000000..76e23be6b97a6
--- /dev/null
+++ b/lldb/test/API/lang/cpp/function-call-from-object-file/common.h
@@ -0,0 +1,8 @@
+#ifndef COMMON_H_IN
+#define COMMON_H_IN
+
+struct Foo {
+  int foo() { return 15; }
+};
+
+#endif // COMMON_H_IN
diff --git a/lldb/test/API/lang/cpp/function-call-from-object-file/lib1.cpp b/lldb/test/API/lang/cpp/function-call-from-object-file/lib1.cpp
new file mode 100644
index 0000000000000..b97bcc1b712b6
--- /dev/null
+++ b/lldb/test/API/lang/cpp/function-call-from-object-file/lib1.cpp
@@ -0,0 +1,8 @@
+#include "common.h"
+
+// Parameter "Foo*" forces LLDB to parse "Foo" from the object
+// file that it is stopped in.
+void lib1_func(Foo *) {
+  // Force definition into lib1.o debug-info.
+  Foo{}.foo();
+}
diff --git a/lldb/test/API/lang/cpp/function-call-from-object-file/lib2.cpp b/lldb/test/API/lang/cpp/function-call-from-object-file/lib2.cpp
new file mode 100644
index 0000000000000..2f9d81a8bdf4c
--- /dev/null
+++ b/lldb/test/API/lang/cpp/function-call-from-object-file/lib2.cpp
@@ -0,0 +1,6 @@
+#include "common.h"
+
+void lib2_func(Foo *) {
+  // Force definition into lib2.o debug-info.
+  Foo{}.foo();
+}
diff --git a/lldb/test/API/lang/cpp/function-call-from-object-file/main.cpp b/lldb/test/API/lang/cpp/function-call-from-object-file/main.cpp
new file mode 100644
index 0000000000000..61ca798daf1df
--- /dev/null
+++ b/lldb/test/API/lang/cpp/function-call-from-object-file/main.cpp
@@ -0,0 +1,10 @@
+struct Foo;
+
+extern void lib1_func(Foo *);
+extern void lib2_func(Foo *);
+
+int main() {
+  lib1_func(nullptr);
+  lib2_func(nullptr);
+  return 0;
+}

>From 76dc812fc92648653765d2cc22e444f8ae96ad65 Mon Sep 17 00:00:00 2001
From: Hendrik_Klug <43926224+Jimmy2027 at users.noreply.github.com>
Date: Wed, 1 Oct 2025 09:40:47 +0200
Subject: [PATCH 20/48] [mlir][transform] Add PromoteTensorOp (#158318)

Transform op to request a tensor value to live in a specific memory
space after bufferization

Co-authored-by: Nicolas Vasilache <Nico.Vasilache at amd.com>
Co-authored-by: Alex Zinenko <ftynse at gmail.com>
---
 .../Linalg/TransformOps/LinalgTransformOps.td |  49 +++++++-
 .../TransformOps/LinalgTransformOps.cpp       | 116 ++++++++++++++----
 .../mlir/dialects/transform/structured.py     |   6 -
 .../Transform/test-promote-tensors.mlir       | 104 ++++++++++++++++
 4 files changed, 239 insertions(+), 36 deletions(-)
 create mode 100644 mlir/test/Dialect/Transform/test-promote-tensors.mlir

diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 8f3232f01544f..0d6ebc087e2f3 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -17,6 +17,7 @@ include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.td"
 include "mlir/Dialect/Transform/IR/TransformTypes.td"
 include "mlir/Dialect/SCF/IR/DeviceMappingInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/InferTypeOpInterface.td"
 include "mlir/IR/OpBase.td"
 include "mlir/IR/RegionKindInterface.td"
 
@@ -236,11 +237,51 @@ def BufferizeToAllocationOp : Op<Transform_Dialect,
                       Transform_AnyOpType:$new_ops);
   let assemblyFormat = "$target attr-dict `:` type($target)";
   let hasVerifier = 1;
+}
 
-  let builders = [
-    OpBuilder<(ins "Value":$target, "Attribute":$memorySpace)>,
-    OpBuilder<(ins "Value":$target, "int64_t":$memorySpace)>
-  ];
+//===----------------------------------------------------------------------===//
+// PromoteTensorOp
+//===----------------------------------------------------------------------===//
+
+def PromoteTensorOp : Op<Transform_Dialect, "structured.promote_tensor",
+                         [DeclareOpInterfaceMethods<TransformOpInterface>,
+                          DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
+                          SameOperandsAndResultType]> {
+  let summary = "Request a tensor value to live in a specific memory space "
+                "after bufferization";
+  let description = [{
+    Requests that a tensor value lives in a specific memory space for its
+    lifetime. This is achieved by allocating a new tensor in the desired
+    memory space with `bufferization.alloc_tensor` and optionally materializing
+    the source value into that allocation with
+    `bufferization.materialize_in_destination`. All uses of the original value
+    are then redirected to the promoted value.
+
+    The generated code for promoting tensor value %0 resembles the following:
+
+      %1 = bufferization.alloc_tensor(<dynamic dims of %0>)
+           { memory_space = memory_space }
+      // Note: the materialization is omitted if %0 is never read and is only
+      // written into (i.e., it behaves as a result tensor).
+      %2 = bufferization.materialize_in_destination %0 in %1
+      // ...
+      <all users of %0 now use %2 instead>
+
+    Deallocation is not handled by this transform.
+
+    Return modes:
+    - Produces a silenceable failure if the given handle does not point to
+      tensor-typed values.
+    - Succeeds otherwise and returns a handle to the promoted value(s), i.e.,
+      the result of materialization if present and the allocation otherwise.
+  }];
+
+  let arguments = (ins TransformValueHandleTypeInterface:$tensor,
+      OptionalAttr<AnyAttr>:$memory_space);
+  let results = (outs TransformValueHandleTypeInterface:$promoted);
+
+  let assemblyFormat =
+      "(`to` $memory_space^)? $tensor attr-dict `:` type($tensor)";
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 3f0b0bacd9756..dd9b4c2490ef4 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -42,6 +42,7 @@
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/DebugLog.h"
 #include "llvm/Support/LogicalResult.h"
@@ -273,32 +274,6 @@ void transform::ApplyFoldPackUnpackIntoEmptyPatternsOp::populatePatterns(
 // BufferizeToAllocationOp
 //===----------------------------------------------------------------------===//
 
-void transform::BufferizeToAllocationOp::build(OpBuilder &b,
-                                               OperationState &result,
-                                               Value target,
-                                               Attribute memorySpace) {
-  SmallVector<Type> resultTypes;
-  resultTypes.push_back(b.getType<transform::AnyValueType>());
-  resultTypes.push_back(b.getType<transform::AnyOpType>());
-  return build(b, result,
-               /*resultTypes=*/resultTypes,
-               /*target=*/target,
-               /*memory_space=*/memorySpace);
-}
-
-void transform::BufferizeToAllocationOp::build(OpBuilder &b,
-                                               OperationState &result,
-                                               Value target,
-                                               int64_t memorySpace) {
-  SmallVector<Type> resultTypes;
-  resultTypes.push_back(b.getType<transform::AnyValueType>());
-  resultTypes.push_back(b.getType<transform::AnyOpType>());
-  return build(b, result,
-               /*resultTypes=*/resultTypes,
-               /*target=*/target,
-               /*memory_space=*/b.getI64IntegerAttr(memorySpace));
-}
-
 namespace {
 class NewOpsListener : public RewriterBase::ForwardingListener {
 public:
@@ -408,6 +383,95 @@ LogicalResult transform::BufferizeToAllocationOp::verify() {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// PromoteTensorOp
+//===----------------------------------------------------------------------===//
+
+/// Return true if the operand may be read from by its owner. This is currently
+/// very conservative and only looks inside linalg operations to prevent
+/// unintentional data loss.
+static bool mayBeRead(OpOperand &operand) {
+  auto linalgOp = dyn_cast<linalg::LinalgOp>(operand.getOwner());
+
+  // Be conservative about ops we cannot analyze deeper.
+  if (!linalgOp)
+    return true;
+
+  // Look inside linalg ops.
+  Value blockArgument = linalgOp.getMatchingBlockArgument(&operand);
+  return !blockArgument.use_empty();
+}
+
+/// Return true if the value may be read through any of its uses.
+static bool mayBeRead(Value value) {
+  // If the value has a reference semantics, it
+  // may be read through any alias...
+  if (!isa<TensorType, FloatType, IntegerType>(value.getType()))
+    return true;
+  return llvm::any_of(value.getUses(),
+                      static_cast<bool (&)(OpOperand &)>(mayBeRead));
+}
+
+DiagnosedSilenceableFailure
+transform::PromoteTensorOp::apply(transform::TransformRewriter &rewriter,
+                                  transform::TransformResults &results,
+                                  transform::TransformState &state) {
+  SmallVector<Value> promoted;
+  for (Value tensor : state.getPayloadValues(getTensor())) {
+    auto type = dyn_cast<RankedTensorType>(tensor.getType());
+    if (!type) {
+      return emitSilenceableError() << "non-tensor type: " << tensor;
+    }
+
+    Operation *definingOp = tensor.getDefiningOp();
+    if (definingOp)
+      rewriter.setInsertionPointAfter(definingOp);
+    else
+      rewriter.setInsertionPointToStart(cast<BlockArgument>(tensor).getOwner());
+
+    // Check this before we emit operations using this value.
+    bool needsMaterialization = mayBeRead(tensor);
+
+    SmallVector<Value> dynamicDims;
+    llvm::SmallPtrSet<Operation *, 4> preservedOps;
+    for (auto [pos, dim] : llvm::enumerate(type.getShape())) {
+      if (!ShapedType::isDynamic(dim))
+        continue;
+      Value cst = rewriter.create<arith::ConstantIndexOp>(tensor.getLoc(), pos);
+      auto dimOp = rewriter.create<tensor::DimOp>(tensor.getLoc(), tensor, cst);
+      preservedOps.insert(dimOp);
+      dynamicDims.push_back(dimOp);
+    }
+    auto allocation = rewriter.create<bufferization::AllocTensorOp>(
+        tensor.getLoc(), type, dynamicDims);
+    // Set memory space if provided.
+    if (getMemorySpaceAttr())
+      allocation.setMemorySpaceAttr(getMemorySpaceAttr());
+    Value allocated = allocation;
+
+    // Only insert a materialization (typically bufferizes to a copy) when the
+    // value may be read from.
+    if (needsMaterialization) {
+      auto copy = rewriter.create<bufferization::MaterializeInDestinationOp>(
+          tensor.getLoc(), tensor, allocated);
+      preservedOps.insert(copy);
+      promoted.push_back(copy.getResult());
+    } else {
+      promoted.push_back(allocated);
+    }
+    rewriter.replaceAllUsesExcept(tensor, promoted.back(), preservedOps);
+  }
+  results.setValues(cast<OpResult>(getPromoted()), promoted);
+  return DiagnosedSilenceableFailure::success();
+}
+
+void transform::PromoteTensorOp::getEffects(
+    SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
+  transform::onlyReadsHandle(getTensorMutable(), effects);
+  transform::producesHandle(getOperation()->getOpResults(), effects);
+  transform::modifiesPayload(effects);
+}
+
 //===----------------------------------------------------------------------===//
 // DecomposeOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/python/mlir/dialects/transform/structured.py b/mlir/python/mlir/dialects/transform/structured.py
index bf40cc532065d..e3bacb5777d9f 100644
--- a/mlir/python/mlir/dialects/transform/structured.py
+++ b/mlir/python/mlir/dialects/transform/structured.py
@@ -44,18 +44,12 @@ def __init__(
         loc=None,
         ip=None,
     ):
-        # No other types are allowed, so hard-code those here.
-        allocated_buffer_type = transform.AnyValueType.get()
-        new_ops_type = transform.AnyOpType.get()
-
         if isinstance(memory_space, int):
             memory_space = str(memory_space)
         if isinstance(memory_space, str):
             memory_space = Attribute.parse(memory_space)
 
         super().__init__(
-            allocated_buffer_type,
-            new_ops_type,
             target,
             memory_space=memory_space,
             memcpy_op=memcpy_op,
diff --git a/mlir/test/Dialect/Transform/test-promote-tensors.mlir b/mlir/test/Dialect/Transform/test-promote-tensors.mlir
new file mode 100644
index 0000000000000..bc9a05af64156
--- /dev/null
+++ b/mlir/test/Dialect/Transform/test-promote-tensors.mlir
@@ -0,0 +1,104 @@
+// RUN: mlir-opt %s --transform-interpreter --split-input-file | FileCheck %s
+
+// CHECK-LABEL: @promote_in0
+// CHECK-SAME:  (%[[ARG0:.+]]: tensor<?x42xf32>, %{{.*}}, %{{.*}})
+// CHECK:  %[[C0:.+]] = arith.constant 0
+// CHECK:  %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]]
+// CHECK:  %[[ALLOC:.+]] = bufferization.alloc_tensor(%[[DIM]]) {memory_space = 1 : i64}
+// CHECK:  %[[MAT:.+]] = bufferization.materialize_in_destination %[[ARG0]] in %[[ALLOC]]
+// CHECK:  linalg.matmul ins(%[[MAT]], %{{.*}}
+func.func @promote_in0(%arg0: tensor<?x42xf32>, %arg1: tensor<42x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
+    %0 = linalg.matmul ins(%arg0, %arg1: tensor<?x42xf32>, tensor<42x?xf32>)
+                       outs(%arg2: tensor<?x?xf32>) -> tensor<?x?xf32>
+    return %0 : tensor<?x?xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+    transform.named_sequence @__transform_main(%root: !transform.any_op) {
+        %mm = transform.structured.match ops{["linalg.matmul"]} in %root
+            : (!transform.any_op) -> !transform.any_op
+        %op0 = transform.get_operand %mm[0]
+            : (!transform.any_op) -> !transform.any_value
+        transform.structured.promote_tensor to 1 %op0 : !transform.any_value
+        transform.yield
+    }
+}
+
+// -----
+
+// CHECK-LABEL: @promote_out
+// CHECK-SAME: (%{{.*}}: tensor<?x42xf32>, %{{.*}}: tensor<?x42xf32>, %[[ARG2:.+]]: tensor<?x?xf32>)
+func.func @promote_out(%arg0: tensor<?x42xf32>, %arg1: tensor<?x42xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
+    // CHECK:  %[[C0:.+]] = arith.constant 0
+    // CHECK:  %[[DIM0:.+]] = tensor.dim %[[ARG2]], %[[C0]]
+    // CHECK:  %[[C1:.+]] = arith.constant 1
+    // CHECK:  %[[DIM1:.+]] = tensor.dim %[[ARG2]], %[[C1]]
+    // CHECK:  %[[ALLOC:.+]] = bufferization.alloc_tensor(%[[DIM0]], %[[DIM1]]) {memory_space = 1 : i64}
+    // CHECK-NOT: materialize_in_destination
+    // CHECK:  linalg.add {{.*}} outs(%[[ALLOC]]
+    %0 = linalg.add ins(%arg0, %arg1 : tensor<?x42xf32>, tensor<?x42xf32>)
+                    outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+    return %0 : tensor<?x?xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+    transform.named_sequence @__transform_main(%root: !transform.any_op) {
+        %la = transform.structured.match ops{["linalg.add"]} in %root
+            : (!transform.any_op) -> !transform.any_op
+        %init = transform.get_operand %la[2]
+                : (!transform.any_op) -> !transform.any_value
+        transform.structured.promote_tensor to 1 %init : !transform.any_value
+
+        transform.yield
+    }
+}
+
+// -----
+
+// CHECK-LABEL: @promote_in0_out_bufferize
+// CHECK-SAME: (%[[ARG0:.+]]: tensor<?x42xf32>, %{{.*}}: tensor<42x?xf32>, %[[ARG2:.+]]: tensor<?x?xf32>)
+func.func @promote_in0_out_bufferize(%arg0: tensor<?x42xf32>, %arg1: tensor<42x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
+    // CHECK:  %[[IN1:.+]] = bufferization.to_buffer %arg1 : tensor<42x?xf32> to memref<42x?xf32, strided<[?, ?], offset: ?>>
+    // CHECK:  %[[IN0:.+]] = bufferization.to_buffer %arg0 : tensor<?x42xf32> to memref<?x42xf32, strided<[?, ?], offset: ?>>
+    // CHECK:  %{{.+}} = bufferization.to_buffer %arg0 : tensor<?x42xf32> to memref<?x42xf32, strided<[?, ?], offset: ?>>
+    // CHECK:  %{{.+}} = bufferization.to_buffer %arg2 : tensor<?x?xf32> to memref<?x?xf32, strided<[?, ?], offset: ?>>
+    // CHECK:  %{{.+}} = bufferization.to_buffer %arg2 : tensor<?x?xf32> to memref<?x?xf32, strided<[?, ?], offset: ?>>
+    // CHECK:  %[[C0:.+]] = arith.constant 0 : index
+    // CHECK:  %{{.+}} = memref.dim %{{.+}}, %[[C0]] : memref<?x?xf32, strided<[?, ?], offset: ?>>
+    // CHECK:  %[[C1:.+]] = arith.constant 1 : index
+    // CHECK:  %{{.+}} = memref.dim %{{.+}}, %[[C1]] : memref<?x?xf32, strided<[?, ?], offset: ?>>
+    // CHECK:  %[[ALLOC_OUT:.+]] = memref.alloc(%{{.+}}, %{{.+}}) {alignment = 64 : i64} : memref<?x?xf32, 1>
+    // CHECK:  %{{.+}} = arith.constant 0 : index
+    // CHECK:  %{{.+}} = memref.dim %{{.+}}, %{{.+}} : memref<?x42xf32, strided<[?, ?], offset: ?>>
+    // CHECK:  %[[ALLOC_IN:.+]] = memref.alloc(%{{.+}}) {alignment = 64 : i64} : memref<?x42xf32, 1>
+    // CHECK:  memref.copy %[[IN0]], %[[ALLOC_IN]] : memref<?x42xf32, strided<[?, ?], offset: ?>> to memref<?x42xf32, 1>
+    // CHECK: linalg.add ins(%[[ALLOC_IN]], %[[IN1]] : memref<?x42xf32, 1>, memref<42x?xf32, strided<[?, ?], offset: ?>>) outs(%[[ALLOC_OUT]] : memref<?x?xf32, 1>)
+    %0 = linalg.add ins(%arg0, %arg1: tensor<?x42xf32>, tensor<42x?xf32>)
+                    outs(%arg2: tensor<?x?xf32>) -> tensor<?x?xf32>
+    return %0 : tensor<?x?xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+    transform.named_sequence @__transform_main(%root: !transform.any_op) {
+        %la = transform.structured.match ops{["linalg.add"]} in %root
+            : (!transform.any_op) -> !transform.any_op
+        %op0 = transform.get_operand %la[0]
+            : (!transform.any_op) -> !transform.any_value
+        transform.structured.promote_tensor to 1 %op0 : !transform.any_value
+
+        %init = transform.get_operand %la[2]
+                : (!transform.any_op) -> !transform.any_value
+        transform.structured.promote_tensor to 1 %init : !transform.any_value
+
+        %func = transform.structured.match ops{["func.func"]} in %root
+                : (!transform.any_op) -> !transform.any_op
+
+        %bufferized = transform.bufferization.one_shot_bufferize %func
+            : (!transform.any_op) -> !transform.any_op
+
+        transform.yield
+    }
+}
+
+
+

>From 7e19fea8d7678471febd2c5eb71b893936658e70 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Wed, 1 Oct 2025 09:33:51 +0200
Subject: [PATCH 21/48] [MemorySanitizer] Generate check lines for some vararg
 tests (NFC)

Use UTC_ARGS: --disable to skip the tests with many arguments.
---
 .../LoongArch/vararg-loongarch64.ll           |  79 ++++++--
 .../MemorySanitizer/Mips/vararg-mips64.ll     |  87 +++++++--
 .../MemorySanitizer/Mips/vararg-mips64el.ll   |  86 +++++++--
 .../MemorySanitizer/PowerPC/vararg-ppc64.ll   | 174 +++++++++++++----
 .../MemorySanitizer/PowerPC/vararg-ppc64le.ll | 175 ++++++++++++++----
 5 files changed, 484 insertions(+), 117 deletions(-)

diff --git a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll
index e6d3a4b2994ad..4d4fc1bdd7bde 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
@@ -9,12 +10,36 @@ declare void @llvm.va_start(ptr) #2
 declare void @llvm.va_end(ptr) #2
 declare void @llvm.lifetime.end.p0(ptr nocapture) #1
 define i32 @foo(i32 %guard, ...) {
-; CHECK-LABEL: @foo
-; CHECK:    [[TMP1:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls
-; CHECK:    [[TMP3:%.*]] = alloca {{.*}} [[TMP1]]
-; CHECK:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 0, i64 [[TMP1]], i1 false)
-; CHECK:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800)
-; CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP3]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP4]], i1 false)
+; CHECK-LABEL: define i32 @foo(
+; CHECK-SAME: i32 [[GUARD:%.*]], ...) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false)
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false)
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[VL:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[VL]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP6]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[VL]])
+; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[VL]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VL]])
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[VL]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr
+; CHECK-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64
+; CHECK-NEXT:    [[TMP14:%.*]] = xor i64 [[TMP13]], 87960930222080
+; CHECK-NEXT:    [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[TMP2]], i64 [[TMP1]], i1 false)
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VL]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[VL]])
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 0
 ;
   %vl = alloca ptr, align 8
   call void @llvm.lifetime.start.p0(ptr %vl)
@@ -27,11 +52,22 @@ define i32 @foo(i32 %guard, ...) {
 ;; Save the incoming shadow value from the arguments in the __msan_va_arg_tls
 ;; array.
 define i32 @bar() {
-; CHECK-LABEL: @bar
-; CHECK:    store i32 0, ptr @__msan_va_arg_tls, align 8
-; CHECK:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
-; CHECK:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
-; CHECK:    store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls
+; CHECK-LABEL: define i32 @bar() {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_va_arg_tls, align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00)
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
   %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00)
   ret i32 %1
@@ -40,15 +76,28 @@ define i32 @bar() {
 ;; Check multiple fixed arguments.
 declare i32 @foo2(i32 %g1, i32 %g2, ...)
 define i32 @bar2() {
-; CHECK-LABEL: @bar2
-; CHECK:    store i64 0, ptr @__msan_va_arg_tls, align 8
-; CHECK:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
-; CHECK:    store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls
+; CHECK-LABEL: define i32 @bar2() {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr @__msan_va_arg_tls, align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00)
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
   %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00)
   ret i32 %1
 }
 
+; UTC_ARGS: --disable
+
 ;; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are
 ;; passed to a variadic function.
 declare i64 @sum(i64 %n, ...)
diff --git a/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64.ll b/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64.ll
index 69a74a37a1f04..9f3f10e51b272 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64.ll
@@ -1,9 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s
 
 target datalayout = "E-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128"
 target triple = "mips64--linux"
 
 define i32 @foo(i32 %guard, ...) {
+; CHECK-LABEL: define i32 @foo(
+; CHECK-SAME: i32 [[GUARD:%.*]], ...) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false)
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false)
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[VL:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[VL]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 549755813888
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP6]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[VL]])
+; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[VL]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 549755813888
+; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VL]])
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[VL]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr
+; CHECK-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64
+; CHECK-NEXT:    [[TMP14:%.*]] = xor i64 [[TMP13]], 549755813888
+; CHECK-NEXT:    [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[TMP2]], i64 [[TMP1]], i1 false)
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VL]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[VL]])
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 0
+;
   %vl = alloca ptr, align 8
   call void @llvm.lifetime.start.p0(ptr %vl)
   call void @llvm.va_start(ptr %vl)
@@ -12,23 +44,29 @@ define i32 @foo(i32 %guard, ...) {
   ret i32 0
 }
 
-; First, check allocation of the save area.
-
-; CHECK-LABEL: @foo
-; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls
-; CHECK: [[C:%.*]] = alloca {{.*}} [[A]]
-
-; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[A]], i1 false)
-
-; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[A]], i64 800)
-; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false)
-
 declare void @llvm.lifetime.start.p0(ptr nocapture) #1
 declare void @llvm.va_start(ptr) #2
 declare void @llvm.va_end(ptr) #2
 declare void @llvm.lifetime.end.p0(ptr nocapture) #1
 
 define i32 @bar() {
+; CHECK-LABEL: define i32 @bar() {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT:    store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 4) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00)
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00)
   ret i32 %1
 }
@@ -36,23 +74,32 @@ define i32 @bar() {
 ; Save the incoming shadow value from the arguments in the __msan_va_arg_tls
 ; array.  The first argument is stored at position 4, since it's right
 ; justified.
-; CHECK-LABEL: @bar
-; CHECK: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 4) to ptr), align 8
-; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
-; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
-; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls
 
 ; Check multiple fixed arguments.
 declare i32 @foo2(i32 %g1, i32 %g2, ...)
 define i32 @bar2() {
+; CHECK-LABEL: define i32 @bar2() {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr @__msan_va_arg_tls, align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00)
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00)
   ret i32 %1
 }
 
-; CHECK-LABEL: @bar2
-; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8
-; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
-; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls
+
+; UTC_ARGS: --disable
 
 ; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are
 ; passed to a variadic function.
diff --git a/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64el.ll b/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64el.ll
index b19da8e9ff14b..41fb975dcf285 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64el.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64el.ll
@@ -1,9 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s
 
 target datalayout = "e-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128"
 target triple = "mips64el--linux"
 
 define i32 @foo(i32 %guard, ...) {
+; CHECK-LABEL: define i32 @foo(
+; CHECK-SAME: i32 [[GUARD:%.*]], ...) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false)
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false)
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[VL:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[VL]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 549755813888
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP6]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[VL]])
+; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[VL]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 549755813888
+; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VL]])
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[VL]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr
+; CHECK-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64
+; CHECK-NEXT:    [[TMP14:%.*]] = xor i64 [[TMP13]], 549755813888
+; CHECK-NEXT:    [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[TMP2]], i64 [[TMP1]], i1 false)
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VL]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[VL]])
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 0
+;
   %vl = alloca ptr, align 8
   call void @llvm.lifetime.start.p0(ptr %vl)
   call void @llvm.va_start(ptr %vl)
@@ -12,46 +44,60 @@ define i32 @foo(i32 %guard, ...) {
   ret i32 0
 }
 
-; First, check allocation of the save area.
-
-; CHECK-LABEL: @foo
-; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls
-; CHECK: [[C:%.*]] = alloca {{.*}} [[A]]
-
-; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[A]], i1 false)
-
-; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[A]], i64 800)
-; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false)
-
 declare void @llvm.lifetime.start.p0(ptr nocapture) #1
 declare void @llvm.va_start(ptr) #2
 declare void @llvm.va_end(ptr) #2
 declare void @llvm.lifetime.end.p0(ptr nocapture) #1
 
 define i32 @bar() {
+; CHECK-LABEL: define i32 @bar() {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_va_arg_tls, align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00)
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00)
   ret i32 %1
 }
 
 ; Save the incoming shadow value from the arguments in the __msan_va_arg_tls
 ; array.
-; CHECK-LABEL: @bar
-; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8
-; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
-; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
-; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls
 
 ; Check multiple fixed arguments.
 declare i32 @foo2(i32 %g1, i32 %g2, ...)
 define i32 @bar2() {
+; CHECK-LABEL: define i32 @bar2() {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr @__msan_va_arg_tls, align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00)
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00)
   ret i32 %1
 }
 
-; CHECK-LABEL: @bar2
-; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8
-; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
-; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls
+; UTC_ARGS: --disable
 
 ; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are
 ; passed to a variadic function.
diff --git a/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64.ll b/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64.ll
index 9351067969050..19b07e16fb46f 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64.ll
@@ -1,9 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s
 
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64--linux"
 
 define i32 @foo(i32 %guard, ...) {
+; CHECK-LABEL: define i32 @foo(
+; CHECK-SAME: i32 [[GUARD:%.*]], ...) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false)
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false)
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[VL:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[VL]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], -246290604621825
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 17592186044416
+; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 8796093022208
+; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP8]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[VL]])
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[VL]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = and i64 [[TMP9]], -246290604621825
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 17592186044416
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP11]], 8796093022208
+; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP13]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VL]])
+; CHECK-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[VL]] to i64
+; CHECK-NEXT:    [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr
+; CHECK-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
+; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64
+; CHECK-NEXT:    [[TMP18:%.*]] = and i64 [[TMP17]], -246290604621825
+; CHECK-NEXT:    [[TMP19:%.*]] = xor i64 [[TMP18]], 17592186044416
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[TMP19]], 8796093022208
+; CHECK-NEXT:    [[TMP21:%.*]] = inttoptr i64 [[TMP20]] to ptr
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[TMP2]], i64 [[TMP1]], i1 false)
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VL]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[VL]])
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 0
+;
   %vl = alloca ptr, align 8
   call void @llvm.lifetime.start.p0(ptr %vl)
   call void @llvm.va_start(ptr %vl)
@@ -12,23 +50,29 @@ define i32 @foo(i32 %guard, ...) {
   ret i32 0
 }
 
-; First, check allocation of the save area.
-
-; CHECK-LABEL: @foo
-; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls
-; CHECK: [[C:%.*]] = alloca {{.*}} [[A]]
-
-; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[A]], i1 false)
-
-; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[A]], i64 800)
-; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false)
-
 declare void @llvm.lifetime.start.p0(ptr nocapture) #1
 declare void @llvm.va_start(ptr) #2
 declare void @llvm.va_end(ptr) #2
 declare void @llvm.lifetime.end.p0(ptr nocapture) #1
 
 define i32 @bar() {
+; CHECK-LABEL: define i32 @bar() {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT:    store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 4) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00)
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00)
   ret i32 %1
 }
@@ -36,14 +80,22 @@ define i32 @bar() {
 ; Save the incoming shadow value from the arguments in the __msan_va_arg_tls
 ; array.  The first argument is stored at position 4, since it's right
 ; justified.
-; CHECK-LABEL: @bar
-; CHECK: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 4) to ptr), align 8
-; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
-; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
-; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls
 
 ; Check vector argument.
 define i32 @bar2() {
+; CHECK-LABEL: define i32 @bar2() {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, <2 x i64> <i64 1, i64 2>)
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %1 = call i32 (i32, ...) @foo(i32 0, <2 x i64> <i64 1, i64 2>)
   ret i32 %1
 }
@@ -51,50 +103,110 @@ define i32 @bar2() {
 ; The vector is at offset 16 of parameter save area, but __msan_va_arg_tls
 ; corresponds to offset 8+ of parameter save area - so the offset from
 ; __msan_va_arg_tls is actually misaligned.
-; CHECK-LABEL: @bar2
-; CHECK: store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
-; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls
 
 ; Check i64 array.
 define i32 @bar4() {
+; CHECK-LABEL: define i32 @bar4() {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store [2 x i64] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store [2 x i64] zeroinitializer, ptr @__msan_va_arg_tls, align 8
+; CHECK-NEXT:    store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2])
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %1 = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2])
   ret i32 %1
 }
 
-; CHECK-LABEL: @bar4
-; CHECK: store [2 x i64] zeroinitializer, ptr @__msan_va_arg_tls, align 8
-; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls
 
 ; Check i128 array.
 define i32 @bar5() {
+; CHECK-LABEL: define i32 @bar5() {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 40, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, [2 x i128] [i128 1, i128 2])
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %1 = call i32 (i32, ...) @foo(i32 0, [2 x i128] [i128 1, i128 2])
   ret i32 %1
 }
 
-; CHECK-LABEL: @bar5
-; CHECK: store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
-; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls
 
 ; Check 8-aligned byval.
 define i32 @bar6(ptr %arg) {
+; CHECK-LABEL: define i32 @bar6(
+; CHECK-SAME: ptr [[ARG:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[ARG]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], -246290604621825
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 17592186044416
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP4]], 8796093022208
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), i8 0, i64 16, i1 false)
+; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[ARG]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = and i64 [[TMP7]], -246290604621825
+; CHECK-NEXT:    [[TMP9:%.*]] = xor i64 [[TMP8]], 17592186044416
+; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[TMP9]], 8796093022208
+; CHECK-NEXT:    [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_va_arg_tls, ptr align 8 [[TMP11]], i64 16, i1 false)
+; CHECK-NEXT:    store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP12:%.*]] = call i32 (i32, ...) @foo(i32 0, ptr byval([2 x i64]) align 8 [[ARG]])
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP12]]
+;
   %1 = call i32 (i32, ...) @foo(i32 0, ptr byval([2 x i64]) align 8 %arg)
   ret i32 %1
 }
 
-; CHECK-LABEL: @bar6
-; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_va_arg_tls, ptr align 8 {{.*}}, i64 16, i1 false)
-; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls
 
 ; Check 16-aligned byval.
 define i32 @bar7(ptr %arg) {
+; CHECK-LABEL: define i32 @bar7(
+; CHECK-SAME: ptr [[ARG:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[ARG]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], -246290604621825
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 17592186044416
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP4]], 8796093022208
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[ARG]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = and i64 [[TMP7]], -246290604621825
+; CHECK-NEXT:    [[TMP9:%.*]] = xor i64 [[TMP8]], 17592186044416
+; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[TMP9]], 8796093022208
+; CHECK-NEXT:    [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), ptr align 8 [[TMP11]], i64 32, i1 false)
+; CHECK-NEXT:    store i64 40, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP12:%.*]] = call i32 (i32, ...) @foo(i32 0, ptr byval([4 x i64]) align 16 [[ARG]])
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP12]]
+;
   %1 = call i32 (i32, ...) @foo(i32 0, ptr byval([4 x i64]) align 16 %arg)
   ret i32 %1
 }
 
-; CHECK-LABEL: @bar7
-; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), ptr align 8 {{.*}}, i64 32, i1 false)
-; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls
 
+; UTC_ARGS: --disable
 
 ; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are
 ; passed to a variadic function.
diff --git a/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64le.ll b/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64le.ll
index 4151f3b223b3a..1fe63850860e8 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64le.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64le.ll
@@ -1,9 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-n32:64"
 target triple = "powerpc64le--linux"
 
 define i32 @foo(i32 %guard, ...) {
+; CHECK-LABEL: define i32 @foo(
+; CHECK-SAME: i32 [[GUARD:%.*]], ...) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false)
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false)
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[VL:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[VL]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], -246290604621825
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 17592186044416
+; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 8796093022208
+; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP8]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[VL]])
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[VL]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = and i64 [[TMP9]], -246290604621825
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 17592186044416
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP11]], 8796093022208
+; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP13]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VL]])
+; CHECK-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[VL]] to i64
+; CHECK-NEXT:    [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr
+; CHECK-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
+; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64
+; CHECK-NEXT:    [[TMP18:%.*]] = and i64 [[TMP17]], -246290604621825
+; CHECK-NEXT:    [[TMP19:%.*]] = xor i64 [[TMP18]], 17592186044416
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[TMP19]], 8796093022208
+; CHECK-NEXT:    [[TMP21:%.*]] = inttoptr i64 [[TMP20]] to ptr
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[TMP2]], i64 [[TMP1]], i1 false)
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr [[VL]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[VL]])
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 0
+;
   %vl = alloca ptr, align 8
   call void @llvm.lifetime.start.p0(ptr %vl)
   call void @llvm.va_start(ptr %vl)
@@ -12,37 +50,51 @@ define i32 @foo(i32 %guard, ...) {
   ret i32 0
 }
 
-; First, check allocation of the save area.
-
-; CHECK-LABEL: @foo
-; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls
-; CHECK: [[C:%.*]] = alloca {{.*}} [[A]]
-
-; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[A]], i1 false)
-
-; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[A]], i64 800)
-; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false)
-
 declare void @llvm.lifetime.start.p0(ptr nocapture) #1
 declare void @llvm.va_start(ptr) #2
 declare void @llvm.va_end(ptr) #2
 declare void @llvm.lifetime.end.p0(ptr nocapture) #1
 
 define i32 @bar() {
+; CHECK-LABEL: define i32 @bar() {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_va_arg_tls, align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00)
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00)
   ret i32 %1
 }
 
 ; Save the incoming shadow value from the arguments in the __msan_va_arg_tls
 ; array.
-; CHECK-LABEL: @bar
-; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8
-; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
-; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
-; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls
 
 ; Check vector argument.
 define i32 @bar2() {
+; CHECK-LABEL: define i32 @bar2() {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, <2 x i64> <i64 1, i64 2>)
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %1 = call i32 (i32, ...) @foo(i32 0, <2 x i64> <i64 1, i64 2>)
   ret i32 %1
 }
@@ -50,49 +102,110 @@ define i32 @bar2() {
 ; The vector is at offset 16 of parameter save area, but __msan_va_arg_tls
 ; corresponds to offset 8+ of parameter save area - so the offset from
 ; __msan_va_arg_tls is actually misaligned.
-; CHECK-LABEL: @bar2
-; CHECK: store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
-; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls
 
 ; Check i64 array.
 define i32 @bar4() {
+; CHECK-LABEL: define i32 @bar4() {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store [2 x i64] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store [2 x i64] zeroinitializer, ptr @__msan_va_arg_tls, align 8
+; CHECK-NEXT:    store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2])
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %1 = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2])
   ret i32 %1
 }
 
-; CHECK-LABEL: @bar4
-; CHECK: store [2 x i64] zeroinitializer, ptr @__msan_va_arg_tls, align 8
-; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls
 
 ; Check i128 array.
 define i32 @bar5() {
+; CHECK-LABEL: define i32 @bar5() {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 40, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, [2 x i128] [i128 1, i128 2])
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
   %1 = call i32 (i32, ...) @foo(i32 0, [2 x i128] [i128 1, i128 2])
   ret i32 %1
 }
 
-; CHECK-LABEL: @bar5
-; CHECK: store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
-; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls
 
 ; Check 8-aligned byval.
 define i32 @bar6(ptr %arg) {
+; CHECK-LABEL: define i32 @bar6(
+; CHECK-SAME: ptr [[ARG:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[ARG]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], -246290604621825
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 17592186044416
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP4]], 8796093022208
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), i8 0, i64 16, i1 false)
+; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[ARG]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = and i64 [[TMP7]], -246290604621825
+; CHECK-NEXT:    [[TMP9:%.*]] = xor i64 [[TMP8]], 17592186044416
+; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[TMP9]], 8796093022208
+; CHECK-NEXT:    [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_va_arg_tls, ptr align 8 [[TMP11]], i64 16, i1 false)
+; CHECK-NEXT:    store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP12:%.*]] = call i32 (i32, ...) @foo(i32 0, ptr byval([2 x i64]) align 8 [[ARG]])
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP12]]
+;
   %1 = call i32 (i32, ...) @foo(i32 0, ptr byval([2 x i64]) align 8 %arg)
   ret i32 %1
 }
 
-; CHECK-LABEL: @bar6
-; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_va_arg_tls, ptr align 8 {{.*}}, i64 16, i1 false)
-; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls
 
 ; Check 16-aligned byval.
 define i32 @bar7(ptr %arg) {
+; CHECK-LABEL: define i32 @bar7(
+; CHECK-SAME: ptr [[ARG:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[ARG]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], -246290604621825
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 17592186044416
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP4]], 8796093022208
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[ARG]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = and i64 [[TMP7]], -246290604621825
+; CHECK-NEXT:    [[TMP9:%.*]] = xor i64 [[TMP8]], 17592186044416
+; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[TMP9]], 8796093022208
+; CHECK-NEXT:    [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), ptr align 8 [[TMP11]], i64 32, i1 false)
+; CHECK-NEXT:    store i64 40, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP12:%.*]] = call i32 (i32, ...) @foo(i32 0, ptr byval([4 x i64]) align 16 [[ARG]])
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP12]]
+;
   %1 = call i32 (i32, ...) @foo(i32 0, ptr byval([4 x i64]) align 16 %arg)
   ret i32 %1
 }
 
-; CHECK-LABEL: @bar7
-; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), ptr align 8 {{.*}}, i64 32, i1 false)
-; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls
+
+; UTC_ARGS: --disable
 
 ; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are
 ; passed to a variadic function.

>From 9672bf80e23c5305c15ebe866eb86851728a8500 Mon Sep 17 00:00:00 2001
From: Abid Qadeer <haqadeer at amd.com>
Date: Wed, 1 Oct 2025 09:05:37 +0100
Subject: [PATCH 22/48] [flang][debug] Change type*N to type(kind=N). (#161432)

It was discussed in https://github.com/llvm/llvm-project/pull/161361.
---
 .../lib/Optimizer/Transforms/DebugTypeGenerator.cpp  |  7 ++++---
 flang/test/Integration/debug-complex-1.f90           |  4 ++--
 flang/test/Integration/debug-local-var-2.f90         |  6 +++---
 flang/test/Transforms/debug-complex-1.fir            |  4 ++--
 flang/test/Transforms/debug-derived-type-1.fir       |  6 +++---
 flang/test/Transforms/debug-fn-info.fir              |  6 +++---
 flang/test/Transforms/debug-local-var.fir            |  6 +++---
 flang/test/Transforms/debug-ref-type.fir             |  2 +-
 flang/test/Transforms/debug-tuple-type.fir           |  2 +-
 flang/test/Transforms/debug-vector-type.fir          | 12 ++++++------
 10 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
index a7e47239036ba..00fdb5a4516bd 100644
--- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
+++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
@@ -682,10 +682,11 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertPointerLikeType(
 static mlir::StringAttr getBasicTypeName(mlir::MLIRContext *context,
                                          llvm::StringRef baseName,
                                          unsigned bitSize) {
-  std::string name(baseName.str());
+  std::ostringstream oss;
+  oss << baseName.str();
   if (bitSize != 32)
-    name += "*" + std::to_string(bitSize / 8);
-  return mlir::StringAttr::get(context, name);
+    oss << "(kind=" << (bitSize / 8) << ")";
+  return mlir::StringAttr::get(context, oss.str());
 }
 
 mlir::LLVM::DITypeAttr
diff --git a/flang/test/Integration/debug-complex-1.f90 b/flang/test/Integration/debug-complex-1.f90
index 48ea0295eb250..1d70140a202d7 100644
--- a/flang/test/Integration/debug-complex-1.f90
+++ b/flang/test/Integration/debug-complex-1.f90
@@ -17,8 +17,8 @@ function fn1(a, b) result (c)
 end program
 
 ! CHECK-DAG: ![[C4:.*]] = !DIBasicType(name: "complex", size: 64, encoding: DW_ATE_complex_float)
-! CHECK-DAG: ![[C8:.*]] = !DIBasicType(name: "complex*8", size: 128, encoding: DW_ATE_complex_float)
-! CHECK-DAG: ![[C16:.*]] = !DIBasicType(name: "complex*16", size: 256, encoding: DW_ATE_complex_float)
+! CHECK-DAG: ![[C8:.*]] = !DIBasicType(name: "complex(kind=8)", size: 128, encoding: DW_ATE_complex_float)
+! CHECK-DAG: ![[C16:.*]] = !DIBasicType(name: "complex(kind=16)", size: 256, encoding: DW_ATE_complex_float)
 ! CHECK-DAG: !DILocalVariable(name: "c4"{{.*}}type: ![[C4]])
 ! CHECK-DAG: !DILocalVariable(name: "c8"{{.*}}type: ![[C8]])
 ! CHECK-DAG: !DILocalVariable(name: "r"{{.*}}type: ![[C16]])
diff --git a/flang/test/Integration/debug-local-var-2.f90 b/flang/test/Integration/debug-local-var-2.f90
index 93659a56c7275..e95263e6841ad 100644
--- a/flang/test/Integration/debug-local-var-2.f90
+++ b/flang/test/Integration/debug-local-var-2.f90
@@ -40,11 +40,11 @@ program mn
 ! BOTH-DAG: ![[MAIN:.*]] = distinct !DISubprogram(name: "MN", {{.*}})
 
 ! BOTH-DAG: ![[TYI32:.*]] = !DIBasicType(name: "integer", size: 32, encoding: DW_ATE_signed)
-! BOTH-DAG: ![[TYI64:.*]] = !DIBasicType(name: "integer*8", size: 64, encoding: DW_ATE_signed)
-! BOTH-DAG: ![[TYL8:.*]]  = !DIBasicType(name: "logical*1", size: 8, encoding: DW_ATE_boolean)
+! BOTH-DAG: ![[TYI64:.*]] = !DIBasicType(name: "integer(kind=8)", size: 64, encoding: DW_ATE_signed)
+! BOTH-DAG: ![[TYL8:.*]]  = !DIBasicType(name: "logical(kind=1)", size: 8, encoding: DW_ATE_boolean)
 ! BOTH-DAG: ![[TYL32:.*]] = !DIBasicType(name: "logical", size: 32, encoding: DW_ATE_boolean)
 ! BOTH-DAG: ![[TYR32:.*]] = !DIBasicType(name: "real", size: 32, encoding: DW_ATE_float)
-! BOTH-DAG: ![[TYR64:.*]] = !DIBasicType(name: "real*8", size: 64, encoding: DW_ATE_float)
+! BOTH-DAG: ![[TYR64:.*]] = !DIBasicType(name: "real(kind=8)", size: 64, encoding: DW_ATE_float)
 
 ! BOTH-DAG: ![[I4]] = !DILocalVariable(name: "i4", scope: ![[MAIN]], file: !{{.*}}, line: [[@LINE+6]], type: ![[TYI32]])
 ! BOTH-DAG: ![[I8]] = !DILocalVariable(name: "i8", scope: ![[MAIN]], file: !{{.*}}, line: [[@LINE+6]], type: ![[TYI64]])
diff --git a/flang/test/Transforms/debug-complex-1.fir b/flang/test/Transforms/debug-complex-1.fir
index 7a288fec69be3..6e2c6c5bdb354 100644
--- a/flang/test/Transforms/debug-complex-1.fir
+++ b/flang/test/Transforms/debug-complex-1.fir
@@ -26,9 +26,9 @@ module {
 #loc3 = loc("./simple.f90":8:1)
 #loc4 = loc("./simple.f90":11:1)
 
-// CHECK-DAG: #[[CMPX8:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "complex*8", sizeInBits = 128, encoding = DW_ATE_complex_float>
+// CHECK-DAG: #[[CMPX8:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "complex(kind=8)", sizeInBits = 128, encoding = DW_ATE_complex_float>
 // CHECK-DAG: #[[CMPX4:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "complex", sizeInBits = 64, encoding = DW_ATE_complex_float>
-// CHECK-DAG: #[[CMPX16:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "complex*16", sizeInBits = 256, encoding = DW_ATE_complex_float>
+// CHECK-DAG: #[[CMPX16:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "complex(kind=16)", sizeInBits = 256, encoding = DW_ATE_complex_float>
 
 // CHECK-DAG: #[[TY1:.*]] = #llvm.di_subroutine_type<{{.*}}types = #[[CMPX8]], #[[CMPX4]]>
 // CHECK-DAG: #[[TY2:.*]] = #llvm.di_subroutine_type<{{.*}}types = #[[CMPX16]], #[[CMPX4]]>
diff --git a/flang/test/Transforms/debug-derived-type-1.fir b/flang/test/Transforms/debug-derived-type-1.fir
index 672b6cf2819d2..22832b67742c8 100644
--- a/flang/test/Transforms/debug-derived-type-1.fir
+++ b/flang/test/Transforms/debug-derived-type-1.fir
@@ -45,12 +45,12 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr<272>, d
 
 
 // CHECK-DAG: #[[INT_TY:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer", sizeInBits = 32, encoding = DW_ATE_signed>
-// CHECK-DAG: #[[INT8_TY:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer*8", sizeInBits = 64, encoding = DW_ATE_signed>
+// CHECK-DAG: #[[INT8_TY:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer(kind=8)", sizeInBits = 64, encoding = DW_ATE_signed>
 // CHECK-DAG: #[[REAL4_TY:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real", sizeInBits = 32, encoding = DW_ATE_float>
 // CHECK-DAG: #[[CMX8_TY:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "complex", sizeInBits = 64, encoding = DW_ATE_complex_float>
 // CHECK-DAG: #[[CMX_ARR:.*]] = #llvm.di_composite_type<tag = DW_TAG_array_type, baseType = #[[CMX8_TY:.*]]{{.*}}>
-// CHECK-DAG: #[[LOG_TY:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "logical*1", sizeInBits = 8, encoding = DW_ATE_boolean>
-// CHECK-DAG: #[[REAL8_TY:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real*8", sizeInBits = 64, encoding = DW_ATE_float>
+// CHECK-DAG: #[[LOG_TY:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "logical(kind=1)", sizeInBits = 8, encoding = DW_ATE_boolean>
+// CHECK-DAG: #[[REAL8_TY:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real(kind=8)", sizeInBits = 64, encoding = DW_ATE_float>
 // CHECK-DAG: #[[STR_TY:.*]] = #llvm.di_string_type
 // CHECK-DAG: #[[MOD:.*]] = #llvm.di_module<{{.*}}name = "m_employee"{{.*}}>
 // CHECK-DAG: #[[MOD1:.*]] = #llvm.di_module<{{.*}}name = "t1"{{.*}}>
diff --git a/flang/test/Transforms/debug-fn-info.fir b/flang/test/Transforms/debug-fn-info.fir
index d82cef1acc423..e42beb1f748f1 100644
--- a/flang/test/Transforms/debug-fn-info.fir
+++ b/flang/test/Transforms/debug-fn-info.fir
@@ -64,10 +64,10 @@ module {
 #loc4 = loc("test2.f90":53:22)
 
 
-// CHECK-DAG: #[[INT8:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer*8", sizeInBits = 64, encoding = DW_ATE_signed>
+// CHECK-DAG: #[[INT8:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer(kind=8)", sizeInBits = 64, encoding = DW_ATE_signed>
 // CHECK-DAG: #[[INT4:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer", sizeInBits = 32, encoding = DW_ATE_signed>
-// CHECK-DAG: #[[REAL8:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real*8", sizeInBits = 64, encoding = DW_ATE_float>
-// CHECK-DAG: #[[LOG1:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "logical*1", sizeInBits = 8, encoding = DW_ATE_boolean>
+// CHECK-DAG: #[[REAL8:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real(kind=8)", sizeInBits = 64, encoding = DW_ATE_float>
+// CHECK-DAG: #[[LOG1:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "logical(kind=1)", sizeInBits = 8, encoding = DW_ATE_boolean>
 // CHECK-DAG: #[[REAL4:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real", sizeInBits = 32, encoding = DW_ATE_float>
 // CHECK-DAG: #[[LOG4:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "logical", sizeInBits = 32, encoding = DW_ATE_boolean>
 // CHECK: #[[TY0:.*]] = #llvm.di_subroutine_type<callingConvention = DW_CC_program, types = #di_null_type>
diff --git a/flang/test/Transforms/debug-local-var.fir b/flang/test/Transforms/debug-local-var.fir
index 466f79c6ed879..d39017e6dd62a 100644
--- a/flang/test/Transforms/debug-local-var.fir
+++ b/flang/test/Transforms/debug-local-var.fir
@@ -71,10 +71,10 @@ module {
 #loc15 = loc("test.f90":21:24)
 #loc16 = loc("test.f90":22:5)
 
-// CHECK-DAG: #[[INT8:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer*8", sizeInBits = 64, encoding = DW_ATE_signed>
+// CHECK-DAG: #[[INT8:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer(kind=8)", sizeInBits = 64, encoding = DW_ATE_signed>
 // CHECK-DAG: #[[INT4:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer", sizeInBits = 32, encoding = DW_ATE_signed>
-// CHECK-DAG: #[[REAL8:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real*8", sizeInBits = 64, encoding = DW_ATE_float>
-// CHECK-DAG: #[[LOG1:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "logical*1", sizeInBits = 8, encoding = DW_ATE_boolean>
+// CHECK-DAG: #[[REAL8:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real(kind=8)", sizeInBits = 64, encoding = DW_ATE_float>
+// CHECK-DAG: #[[LOG1:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "logical(kind=1)", sizeInBits = 8, encoding = DW_ATE_boolean>
 // CHECK-DAG: #[[REAL4:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real", sizeInBits = 32, encoding = DW_ATE_float>
 // CHECK-DAG: #[[LOG4:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "logical", sizeInBits = 32, encoding = DW_ATE_boolean>
 // CHECK-DAG: #[[MAIN:.*]] = #llvm.di_subprogram<{{.*}}name = "mn"{{.*}}>
diff --git a/flang/test/Transforms/debug-ref-type.fir b/flang/test/Transforms/debug-ref-type.fir
index 2164a40c7c111..daffa293ba2e3 100644
--- a/flang/test/Transforms/debug-ref-type.fir
+++ b/flang/test/Transforms/debug-ref-type.fir
@@ -5,6 +5,6 @@ module {
 }
 #loc1 = loc("test.f90":5:1)
 
-// CHECK: #[[INT8_TY:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer*1", sizeInBits = 8, encoding = DW_ATE_signed>
+// CHECK: #[[INT8_TY:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer(kind=1)", sizeInBits = 8, encoding = DW_ATE_signed>
 // CHECK: #[[REF_TY:.*]] = #llvm.di_derived_type<tag = DW_TAG_pointer_type, name = "", baseType = #[[INT8_TY]]{{.*}}>
 // CHECK: #llvm.di_subroutine_type<{{.*}}types = #[[REF_TY]], #[[INT8_TY]]>
diff --git a/flang/test/Transforms/debug-tuple-type.fir b/flang/test/Transforms/debug-tuple-type.fir
index b865d492b6696..73a07333b3aef 100644
--- a/flang/test/Transforms/debug-tuple-type.fir
+++ b/flang/test/Transforms/debug-tuple-type.fir
@@ -5,7 +5,7 @@ module {
   func.func private @_FortranAioOutputDerivedType(!fir.ref<tuple<>>)
 }
 
-// CHECK: #[[F64:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real*8", sizeInBits = 64, encoding = DW_ATE_float>
+// CHECK: #[[F64:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real(kind=8)", sizeInBits = 64, encoding = DW_ATE_float>
 // CHECK: #[[CU:.*]] = #llvm.di_compile_unit<{{.*}}>
 // CHECK: #[[DTY1:.*]] = #llvm.di_derived_type<tag = DW_TAG_member, name = "", baseType = #[[F64]], sizeInBits = 64, alignInBits = {{.*}}>
 // CHECK: #[[DTY2:.*]] = #llvm.di_derived_type<tag = DW_TAG_member, name = "", baseType = #[[F64]], sizeInBits = 64, alignInBits = {{.*}}, offsetInBits = {{.*}}>
diff --git a/flang/test/Transforms/debug-vector-type.fir b/flang/test/Transforms/debug-vector-type.fir
index cfb97ea46ba61..9e41d90f407b9 100644
--- a/flang/test/Transforms/debug-vector-type.fir
+++ b/flang/test/Transforms/debug-vector-type.fir
@@ -2,22 +2,22 @@
 
 module {
 func.func private @foo1(%arg0: !fir.vector<20:bf16>)
-// CHECK-DAG: #[[F16:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real*2", sizeInBits = 16, encoding = DW_ATE_float>
-// CHECK-DAG: #llvm.di_composite_type<tag = DW_TAG_array_type, name = "vector real*2 (20)", baseType = #[[F16]], flags = Vector, sizeInBits = 320, elements = #llvm.di_subrange<count = 20 : i64>>
+// CHECK-DAG: #[[F16:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real(kind=2)", sizeInBits = 16, encoding = DW_ATE_float>
+// CHECK-DAG: #llvm.di_composite_type<tag = DW_TAG_array_type, name = "vector real(kind=2) (20)", baseType = #[[F16]], flags = Vector, sizeInBits = 320, elements = #llvm.di_subrange<count = 20 : i64>>
 
 func.func private @foo2(%arg0: !fir.vector<30:f32>)
 // CHECK-DAG: #[[F32:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real", sizeInBits = 32, encoding = DW_ATE_float>
 // CHECK-DAG: #llvm.di_composite_type<tag = DW_TAG_array_type, name = "vector real (30)", baseType = #[[F32]], flags = Vector, sizeInBits = 960, elements = #llvm.di_subrange<count = 30 : i64>>
 
 func.func private @foo3(%arg0: !fir.vector<10:f64>)
-// CHECK-DAG: #[[F64:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real*8", sizeInBits = 64, encoding = DW_ATE_float>
-// CHECK-DAG: #llvm.di_composite_type<tag = DW_TAG_array_type, name = "vector real*8 (10)", baseType = #[[F64]], flags = Vector, sizeInBits = 640, elements = #llvm.di_subrange<count = 10 : i64>>
+// CHECK-DAG: #[[F64:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real(kind=8)", sizeInBits = 64, encoding = DW_ATE_float>
+// CHECK-DAG: #llvm.di_composite_type<tag = DW_TAG_array_type, name = "vector real(kind=8) (10)", baseType = #[[F64]], flags = Vector, sizeInBits = 640, elements = #llvm.di_subrange<count = 10 : i64>>
 
 func.func private @foo4(%arg0: !fir.vector<5:i32>)
 // CHECK-DAG: #[[I32:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer", sizeInBits = 32, encoding = DW_ATE_signed>
 // CHECK-DAG: #llvm.di_composite_type<tag = DW_TAG_array_type, name = "vector integer (5)", baseType = #[[I32]], flags = Vector, sizeInBits = 160, elements = #llvm.di_subrange<count = 5 : i64>>
 
 func.func private @foo5(%arg0: !fir.vector<2:i64>)
-// CHECK-DAG: #[[I64:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer*8", sizeInBits = 64, encoding = DW_ATE_signed>
-// CHECK-DAG: #llvm.di_composite_type<tag = DW_TAG_array_type, name = "vector integer*8 (2)", baseType = #[[I64]], flags = Vector, sizeInBits = 128, elements = #llvm.di_subrange<count = 2 : i64>>
+// CHECK-DAG: #[[I64:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer(kind=8)", sizeInBits = 64, encoding = DW_ATE_signed>
+// CHECK-DAG: #llvm.di_composite_type<tag = DW_TAG_array_type, name = "vector integer(kind=8) (2)", baseType = #[[I64]], flags = Vector, sizeInBits = 128, elements = #llvm.di_subrange<count = 2 : i64>>
 }

>From 603da1c2f829ecb39d29f6d5e6a677caad996223 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Wed, 1 Oct 2025 10:11:41 +0200
Subject: [PATCH 23/48] [MemorySanitizer] Generate test checks for kmsan test
 (NFC)

---
 .../MemorySanitizer/msan_kernel_basic.ll      | 813 ++++++++++++------
 1 file changed, 538 insertions(+), 275 deletions(-)

diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_kernel_basic.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_kernel_basic.ll
index 4b7a910af08bf..a7209de32380a 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/msan_kernel_basic.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/msan_kernel_basic.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; KMSAN instrumentation tests
 ; RUN: opt < %s -msan-kernel=1 -S -passes=msan 2>&1 | FileCheck %s -check-prefixes=CHECK
 
@@ -6,309 +7,495 @@ target triple = "x86_64-unknown-linux-gnu"
 
 ; Check the instrumentation prologue.
 define void @Empty() nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @Empty(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__msan_get_context_state()
+; CHECK-NEXT:    [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0
+; CHECK-NEXT:    [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1
+; CHECK-NEXT:    [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2
+; CHECK-NEXT:    [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3
+; CHECK-NEXT:    [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4
+; CHECK-NEXT:    [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5
+; CHECK-NEXT:    [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    ret void
+;
 entry:
   ret void
 }
 
-; CHECK-LABEL: @Empty
-; CHECK: entry:
-; CHECK: @__msan_get_context_state()
-; %param_shadow:
-; CHECK: getelementptr {{.*}} i32 0, i32 0
-; %retval_shadow:
-; CHECK: getelementptr {{.*}} i32 0, i32 1
-; %va_arg_shadow:
-; CHECK: getelementptr {{.*}} i32 0, i32 2
-; %va_arg_origin:
-; CHECK: getelementptr {{.*}} i32 0, i32 3
-; %va_arg_overflow_size:
-; CHECK: getelementptr {{.*}} i32 0, i32 4
-; %param_origin:
-; CHECK: getelementptr {{.*}} i32 0, i32 5
-; %retval_origin:
-; CHECK: getelementptr {{.*}} i32 0, i32 6
-
 ; Check instrumentation of stores
-
 define void @Store1(ptr nocapture %p, i8 %x) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @Store1(
+; CHECK-SAME: ptr captures(none) [[P:%.*]], i8 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__msan_get_context_state()
+; CHECK-NEXT:    [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0
+; CHECK-NEXT:    [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1
+; CHECK-NEXT:    [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2
+; CHECK-NEXT:    [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3
+; CHECK-NEXT:    [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4
+; CHECK-NEXT:    [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5
+; CHECK-NEXT:    [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[TMP5]], 8
+; CHECK-NEXT:    [[_MSARG1:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[_MSARG1]], align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[TMP8]], 8
+; CHECK-NEXT:    [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[_MSARG_O2]], align 4
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       [[BB11]]:
+; CHECK-NEXT:    call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8:[0-9]+]]
+; CHECK-NEXT:    br label %[[BB12]]
+; CHECK:       [[BB12]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_1(ptr [[P]])
+; CHECK-NEXT:    [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 1
+; CHECK-NEXT:    store i8 [[TMP7]], ptr [[TMP14]], align 1
+; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i8 [[TMP7]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label %[[BB16:.*]], label %[[BB18:.*]], !prof [[PROF1]]
+; CHECK:       [[BB16]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP10]])
+; CHECK-NEXT:    store i32 [[TMP17]], ptr [[TMP15]], align 4
+; CHECK-NEXT:    br label %[[BB18]]
+; CHECK:       [[BB18]]:
+; CHECK-NEXT:    store i8 [[X]], ptr [[P]], align 1
+; CHECK-NEXT:    ret void
+;
 entry:
   store i8 %x, ptr %p
   ret void
 }
 
-; CHECK-LABEL: @Store1
-; CHECK: entry:
-; CHECK: @__msan_get_context_state()
-; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0
-; CHECK: [[BASE:%[0-9]+]] = ptrtoint {{.*}} [[PARAM_SHADOW]]
-; CHECK: [[SHADOW_PTR:%[a-z0-9_]+]] = inttoptr {{.*}} [[BASE]]
-; CHECK: [[SHADOW:%[a-z0-9]+]] = load i64, ptr [[SHADOW_PTR]]
-; CHECK: [[BASE2:%[0-9]+]] = ptrtoint {{.*}} [[PARAM_SHADOW]]
-; Load the shadow of %p and check it
-; CHECK: icmp ne i64 [[SHADOW]]
-; CHECK: br i1
-; CHECK: {{^[0-9]+}}:
-; CHECK: @__msan_metadata_ptr_for_store_1(ptr %p)
-; CHECK: store i8
-; If the new shadow is non-zero, jump to __msan_chain_origin()
-; CHECK: icmp
-; CHECK: br i1
-; CHECK: {{^[0-9]+}}:
-; CHECK: @__msan_chain_origin
-; Storing origin here:
-; CHECK: store i32
-; CHECK: br label
-; CHECK: {{^[0-9]+}}:
-; CHECK: store i8
-; CHECK: ret void
-
 define void @Store2(ptr nocapture %p, i16 %x) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @Store2(
+; CHECK-SAME: ptr captures(none) [[P:%.*]], i16 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__msan_get_context_state()
+; CHECK-NEXT:    [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0
+; CHECK-NEXT:    [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1
+; CHECK-NEXT:    [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2
+; CHECK-NEXT:    [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3
+; CHECK-NEXT:    [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4
+; CHECK-NEXT:    [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5
+; CHECK-NEXT:    [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[TMP5]], 8
+; CHECK-NEXT:    [[_MSARG1:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr [[_MSARG1]], align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[TMP8]], 8
+; CHECK-NEXT:    [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[_MSARG_O2]], align 4
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]]
+; CHECK:       [[BB11]]:
+; CHECK-NEXT:    call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]]
+; CHECK-NEXT:    br label %[[BB12]]
+; CHECK:       [[BB12]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_2(ptr [[P]])
+; CHECK-NEXT:    [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 1
+; CHECK-NEXT:    store i16 [[TMP7]], ptr [[TMP14]], align 2
+; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i16 [[TMP7]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label %[[BB16:.*]], label %[[BB18:.*]], !prof [[PROF1]]
+; CHECK:       [[BB16]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP10]])
+; CHECK-NEXT:    store i32 [[TMP17]], ptr [[TMP15]], align 4
+; CHECK-NEXT:    br label %[[BB18]]
+; CHECK:       [[BB18]]:
+; CHECK-NEXT:    store i16 [[X]], ptr [[P]], align 2
+; CHECK-NEXT:    ret void
+;
 entry:
   store i16 %x, ptr %p
   ret void
 }
 
-; CHECK-LABEL: @Store2
-; CHECK: entry:
-; CHECK: @__msan_get_context_state()
-; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0
-; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]]
-; Load the shadow of %p and check it
-; CHECK: load i64
-; CHECK: icmp
-; CHECK: br i1
-; CHECK: {{^[0-9]+}}:
-; CHECK: @__msan_metadata_ptr_for_store_2(ptr %p)
-; CHECK: store i16
-; If the new shadow is non-zero, jump to __msan_chain_origin()
-; CHECK: icmp
-; CHECK: br i1
-; CHECK: {{^[0-9]+}}:
-; CHECK: @__msan_chain_origin
-; Storing origin here:
-; CHECK: store i32
-; CHECK: br label
-; CHECK: {{^[0-9]+}}:
-; CHECK: store i16
-; CHECK: ret void
-
-
 define void @Store4(ptr nocapture %p, i32 %x) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @Store4(
+; CHECK-SAME: ptr captures(none) [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__msan_get_context_state()
+; CHECK-NEXT:    [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0
+; CHECK-NEXT:    [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1
+; CHECK-NEXT:    [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2
+; CHECK-NEXT:    [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3
+; CHECK-NEXT:    [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4
+; CHECK-NEXT:    [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5
+; CHECK-NEXT:    [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[TMP5]], 8
+; CHECK-NEXT:    [[_MSARG1:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[_MSARG1]], align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[TMP8]], 8
+; CHECK-NEXT:    [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[_MSARG_O2]], align 4
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]]
+; CHECK:       [[BB11]]:
+; CHECK-NEXT:    call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]]
+; CHECK-NEXT:    br label %[[BB12]]
+; CHECK:       [[BB12]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_4(ptr [[P]])
+; CHECK-NEXT:    [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 1
+; CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP14]], align 4
+; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i32 [[TMP7]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label %[[BB16:.*]], label %[[BB18:.*]], !prof [[PROF1]]
+; CHECK:       [[BB16]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP10]])
+; CHECK-NEXT:    store i32 [[TMP17]], ptr [[TMP15]], align 4
+; CHECK-NEXT:    br label %[[BB18]]
+; CHECK:       [[BB18]]:
+; CHECK-NEXT:    store i32 [[X]], ptr [[P]], align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   store i32 %x, ptr %p
   ret void
 }
 
-; CHECK-LABEL: @Store4
-; CHECK: entry:
-; CHECK: @__msan_get_context_state()
-; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0
-; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]]
-; Load the shadow of %p and check it
-; CHECK: load i32
-; CHECK: icmp
-; CHECK: br i1
-; CHECK: {{^[0-9]+}}:
-; CHECK: @__msan_metadata_ptr_for_store_4(ptr %p)
-; CHECK: store i32
-; If the new shadow is non-zero, jump to __msan_chain_origin()
-; CHECK: icmp
-; CHECK: br i1
-; CHECK: {{^[0-9]+}}:
-; CHECK: @__msan_chain_origin
-; Storing origin here:
-; CHECK: store i32
-; CHECK: br label
-; CHECK: {{^[0-9]+}}:
-; CHECK: store i32
-; CHECK: ret void
-
 define void @Store8(ptr nocapture %p, i64 %x) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @Store8(
+; CHECK-SAME: ptr captures(none) [[P:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__msan_get_context_state()
+; CHECK-NEXT:    [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0
+; CHECK-NEXT:    [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1
+; CHECK-NEXT:    [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2
+; CHECK-NEXT:    [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3
+; CHECK-NEXT:    [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4
+; CHECK-NEXT:    [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5
+; CHECK-NEXT:    [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[TMP5]], 8
+; CHECK-NEXT:    [[_MSARG1:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[TMP7:%.*]] = load i64, ptr [[_MSARG1]], align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[TMP8]], 8
+; CHECK-NEXT:    [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[_MSARG_O2]], align 4
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]]
+; CHECK:       [[BB11]]:
+; CHECK-NEXT:    call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]]
+; CHECK-NEXT:    br label %[[BB12]]
+; CHECK:       [[BB12]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_8(ptr [[P]])
+; CHECK-NEXT:    [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 1
+; CHECK-NEXT:    store i64 [[TMP7]], ptr [[TMP14]], align 8
+; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP7]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label %[[BB16:.*]], label %[[BB21:.*]], !prof [[PROF1]]
+; CHECK:       [[BB16]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP10]])
+; CHECK-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
+; CHECK-NEXT:    [[TMP19:%.*]] = shl i64 [[TMP18]], 32
+; CHECK-NEXT:    [[TMP20:%.*]] = or i64 [[TMP18]], [[TMP19]]
+; CHECK-NEXT:    store i64 [[TMP20]], ptr [[TMP15]], align 8
+; CHECK-NEXT:    br label %[[BB21]]
+; CHECK:       [[BB21]]:
+; CHECK-NEXT:    store i64 [[X]], ptr [[P]], align 8
+; CHECK-NEXT:    ret void
+;
 entry:
   store i64 %x, ptr %p
   ret void
 }
 
-; CHECK-LABEL: @Store8
-; CHECK: entry:
-; CHECK: @__msan_get_context_state()
-; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0
-; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]]
-; Load the shadow of %p and check it
-; CHECK: load i64
-; CHECK: icmp
-; CHECK: br i1
-; CHECK: {{^[0-9]+}}:
-; CHECK: @__msan_metadata_ptr_for_store_8(ptr %p)
-; CHECK: store i64
-; If the new shadow is non-zero, jump to __msan_chain_origin()
-; CHECK: icmp
-; CHECK: br i1
-; CHECK: {{^[0-9]+}}:
-; CHECK: @__msan_chain_origin
-; Storing origin here:
-; CHECK: store i64
-; CHECK: br label
-; CHECK: {{^[0-9]+}}:
-; CHECK: store i64
-; CHECK: ret void
-
 define void @Store16(ptr nocapture %p, i128 %x) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @Store16(
+; CHECK-SAME: ptr captures(none) [[P:%.*]], i128 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__msan_get_context_state()
+; CHECK-NEXT:    [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0
+; CHECK-NEXT:    [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1
+; CHECK-NEXT:    [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2
+; CHECK-NEXT:    [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3
+; CHECK-NEXT:    [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4
+; CHECK-NEXT:    [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5
+; CHECK-NEXT:    [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[TMP5]], 8
+; CHECK-NEXT:    [[_MSARG1:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[TMP7:%.*]] = load i128, ptr [[_MSARG1]], align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[TMP8]], 8
+; CHECK-NEXT:    [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[_MSARG_O2]], align 4
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]]
+; CHECK:       [[BB11]]:
+; CHECK-NEXT:    call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]]
+; CHECK-NEXT:    br label %[[BB12]]
+; CHECK:       [[BB12]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_n(ptr [[P]], i64 16)
+; CHECK-NEXT:    [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 1
+; CHECK-NEXT:    store i128 [[TMP7]], ptr [[TMP14]], align 8
+; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label %[[BB16:.*]], label %[[BB22:.*]], !prof [[PROF1]]
+; CHECK:       [[BB16]]:
+; CHECK-NEXT:    [[TMP17:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP10]])
+; CHECK-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
+; CHECK-NEXT:    [[TMP19:%.*]] = shl i64 [[TMP18]], 32
+; CHECK-NEXT:    [[TMP20:%.*]] = or i64 [[TMP18]], [[TMP19]]
+; CHECK-NEXT:    store i64 [[TMP20]], ptr [[TMP15]], align 8
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i64, ptr [[TMP15]], i32 1
+; CHECK-NEXT:    store i64 [[TMP20]], ptr [[TMP21]], align 8
+; CHECK-NEXT:    br label %[[BB22]]
+; CHECK:       [[BB22]]:
+; CHECK-NEXT:    store i128 [[X]], ptr [[P]], align 8
+; CHECK-NEXT:    ret void
+;
 entry:
   store i128 %x, ptr %p
   ret void
 }
 
-; CHECK-LABEL: @Store16
-; CHECK: entry:
-; CHECK: @__msan_get_context_state()
-; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0
-; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]]
-; Load the shadow of %p and check it
-; CHECK: load i64
-; CHECK: icmp
-; CHECK: br i1
-; CHECK: {{^[0-9]+}}:
-; CHECK: @__msan_metadata_ptr_for_store_n(ptr %p, i64 16)
-; CHECK: store i128
-; If the new shadow is non-zero, jump to __msan_chain_origin()
-; CHECK: icmp
-; CHECK: br i1
-; CHECK: {{^[0-9]+}}:
-; CHECK: @__msan_chain_origin
-; Storing origin here:
-; CHECK: store i64
-; CHECK: br label
-; CHECK: {{^[0-9]+}}:
-; CHECK: store i128
-; CHECK: ret void
-
-
 ; Check instrumentation of loads
 
 define i8 @Load1(ptr nocapture %p) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define i8 @Load1(
+; CHECK-SAME: ptr captures(none) [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__msan_get_context_state()
+; CHECK-NEXT:    [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0
+; CHECK-NEXT:    [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1
+; CHECK-NEXT:    [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2
+; CHECK-NEXT:    [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3
+; CHECK-NEXT:    [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4
+; CHECK-NEXT:    [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5
+; CHECK-NEXT:    [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]]
+; CHECK:       [[BB5]]:
+; CHECK-NEXT:    call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]]
+; CHECK-NEXT:    br label %[[BB6]]
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[P]], align 1
+; CHECK-NEXT:    [[TMP8:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_1(ptr [[P]])
+; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 1
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i8, ptr [[TMP9]], align 1
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+; CHECK-NEXT:    store i8 [[_MSLD]], ptr [[RETVAL_SHADOW]], align 8
+; CHECK-NEXT:    store i32 [[TMP11]], ptr [[RETVAL_ORIGIN]], align 4
+; CHECK-NEXT:    ret i8 [[TMP7]]
+;
 entry:
   %0 = load i8, ptr %p
   ret i8 %0
 }
 
-; CHECK-LABEL: @Load1
-; CHECK: entry:
-; CHECK: @__msan_get_context_state()
-; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0
-; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]]
-; Load the shadow of %p and check it
-; CHECK: load i64
-; CHECK: icmp
-; CHECK: br i1
-; CHECK: {{^[0-9]+}}:
-; Load the value from %p. This is done before accessing the shadow
-; to ease atomic handling.
-; CHECK: load i8
-; CHECK: @__msan_metadata_ptr_for_load_1(ptr %p)
-; Load the shadow and origin.
-; CHECK: load i8
-; CHECK: load i32
-
-
 define i16 @Load2(ptr nocapture %p) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define i16 @Load2(
+; CHECK-SAME: ptr captures(none) [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__msan_get_context_state()
+; CHECK-NEXT:    [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0
+; CHECK-NEXT:    [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1
+; CHECK-NEXT:    [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2
+; CHECK-NEXT:    [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3
+; CHECK-NEXT:    [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4
+; CHECK-NEXT:    [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5
+; CHECK-NEXT:    [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]]
+; CHECK:       [[BB5]]:
+; CHECK-NEXT:    call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]]
+; CHECK-NEXT:    br label %[[BB6]]
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr [[P]], align 2
+; CHECK-NEXT:    [[TMP8:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_2(ptr [[P]])
+; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 1
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i16, ptr [[TMP9]], align 2
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+; CHECK-NEXT:    store i16 [[_MSLD]], ptr [[RETVAL_SHADOW]], align 8
+; CHECK-NEXT:    store i32 [[TMP11]], ptr [[RETVAL_ORIGIN]], align 4
+; CHECK-NEXT:    ret i16 [[TMP7]]
+;
 entry:
   %0 = load i16, ptr %p
   ret i16 %0
 }
 
-; CHECK-LABEL: @Load2
-; CHECK: entry:
-; CHECK: @__msan_get_context_state()
-; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0
-; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]]
-; Load the shadow of %p and check it
-; CHECK: load i64
-; CHECK: icmp
-; CHECK: br i1
-; CHECK: {{^[0-9]+}}:
-; Load the value from %p. This is done before accessing the shadow
-; to ease atomic handling.
-; CHECK: load i16
-; CHECK: @__msan_metadata_ptr_for_load_2(ptr %p)
-; Load the shadow and origin.
-; CHECK: load i16
-; CHECK: load i32
-
-
 define i32 @Load4(ptr nocapture %p) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define i32 @Load4(
+; CHECK-SAME: ptr captures(none) [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__msan_get_context_state()
+; CHECK-NEXT:    [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0
+; CHECK-NEXT:    [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1
+; CHECK-NEXT:    [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2
+; CHECK-NEXT:    [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3
+; CHECK-NEXT:    [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4
+; CHECK-NEXT:    [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5
+; CHECK-NEXT:    [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]]
+; CHECK:       [[BB5]]:
+; CHECK-NEXT:    call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]]
+; CHECK-NEXT:    br label %[[BB6]]
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_4(ptr [[P]])
+; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 1
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+; CHECK-NEXT:    store i32 [[_MSLD]], ptr [[RETVAL_SHADOW]], align 8
+; CHECK-NEXT:    store i32 [[TMP11]], ptr [[RETVAL_ORIGIN]], align 4
+; CHECK-NEXT:    ret i32 [[TMP7]]
+;
 entry:
   %0 = load i32, ptr %p
   ret i32 %0
 }
 
-; CHECK-LABEL: @Load4
-; CHECK: entry:
-; CHECK: @__msan_get_context_state()
-; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0
-; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]]
-; Load the shadow of %p and check it
-; CHECK: load i64
-; CHECK: icmp
-; CHECK: br i1
-; CHECK: {{^[0-9]+}}:
-; Load the value from %p. This is done before accessing the shadow
-; to ease atomic handling.
-; CHECK: load i32
-; CHECK: @__msan_metadata_ptr_for_load_4(ptr %p)
-; Load the shadow and origin.
-; CHECK: load i32
-; CHECK: load i32
-
 define i64 @Load8(ptr nocapture %p) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define i64 @Load8(
+; CHECK-SAME: ptr captures(none) [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__msan_get_context_state()
+; CHECK-NEXT:    [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0
+; CHECK-NEXT:    [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1
+; CHECK-NEXT:    [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2
+; CHECK-NEXT:    [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3
+; CHECK-NEXT:    [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4
+; CHECK-NEXT:    [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5
+; CHECK-NEXT:    [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]]
+; CHECK:       [[BB5]]:
+; CHECK-NEXT:    call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]]
+; CHECK-NEXT:    br label %[[BB6]]
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_8(ptr [[P]])
+; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 1
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 8
+; CHECK-NEXT:    store i64 [[_MSLD]], ptr [[RETVAL_SHADOW]], align 8
+; CHECK-NEXT:    store i32 [[TMP11]], ptr [[RETVAL_ORIGIN]], align 4
+; CHECK-NEXT:    ret i64 [[TMP7]]
+;
 entry:
   %0 = load i64, ptr %p
   ret i64 %0
 }
 
-; CHECK-LABEL: @Load8
-; CHECK: entry:
-; CHECK: @__msan_get_context_state()
-; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0
-; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]]
-; Load the shadow of %p and check it
-; CHECK: load i64
-; CHECK: icmp
-; CHECK: br i1
-; CHECK: {{^[0-9]+}}:
-; Load the value from %p. This is done before accessing the shadow
-; to ease atomic handling.
-; CHECK: load i64
-; CHECK: @__msan_metadata_ptr_for_load_8(ptr %p)
-; Load the shadow and origin.
-; CHECK: load i64
-; CHECK: load i32
-
 define i128 @Load16(ptr nocapture %p) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define i128 @Load16(
+; CHECK-SAME: ptr captures(none) [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__msan_get_context_state()
+; CHECK-NEXT:    [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0
+; CHECK-NEXT:    [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1
+; CHECK-NEXT:    [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2
+; CHECK-NEXT:    [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3
+; CHECK-NEXT:    [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4
+; CHECK-NEXT:    [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5
+; CHECK-NEXT:    [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]]
+; CHECK:       [[BB5]]:
+; CHECK-NEXT:    call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]]
+; CHECK-NEXT:    br label %[[BB6]]
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = load i128, ptr [[P]], align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_n(ptr [[P]], i64 16)
+; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 1
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i128, ptr [[TMP9]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 8
+; CHECK-NEXT:    store i128 [[_MSLD]], ptr [[RETVAL_SHADOW]], align 8
+; CHECK-NEXT:    store i32 [[TMP11]], ptr [[RETVAL_ORIGIN]], align 4
+; CHECK-NEXT:    ret i128 [[TMP7]]
+;
 entry:
   %0 = load i128, ptr %p
   ret i128 %0
 }
 
-; CHECK-LABEL: @Load16
-; CHECK: entry:
-; CHECK: @__msan_get_context_state()
-; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0
-; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]]
-; Load the shadow of %p and check it
-; CHECK: load i64
-; CHECK: icmp
-; CHECK: br i1
-; CHECK: {{^[0-9]+}}:
-; Load the value from %p. This is done before accessing the shadow
-; to ease atomic handling.
-; CHECK: load i128
-; CHECK: @__msan_metadata_ptr_for_load_n(ptr %p, i64 16)
-; Load the shadow and origin.
-; CHECK: load i128
-; CHECK: load i32
-
-
 ; Test kernel-specific va_list instrumentation
 
 %struct.__va_list_tag = type { i32, i32, ptr, ptr }
@@ -319,6 +506,78 @@ declare dso_local i32 @VAListFn(ptr, ptr) local_unnamed_addr
 
 ; Function Attrs: nounwind uwtable
 define dso_local i32 @VarArgFn(ptr %fmt, ...) local_unnamed_addr sanitize_memory #0 {
+; CHECK-LABEL: define dso_local i32 @VarArgFn(
+; CHECK-SAME: ptr [[FMT:%.*]], ...) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__msan_get_context_state()
+; CHECK-NEXT:    [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0
+; CHECK-NEXT:    [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1
+; CHECK-NEXT:    [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2
+; CHECK-NEXT:    [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3
+; CHECK-NEXT:    [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4
+; CHECK-NEXT:    [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5
+; CHECK-NEXT:    [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i64, ptr [[VA_ARG_OVERFLOW_SIZE]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 48, [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = alloca i8, i64 [[TMP6]], align 8
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP7]], i8 0, i64 [[TMP6]], i1 false)
+; CHECK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP6]], i64 800)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP7]], ptr align 8 [[VA_ARG_SHADOW]], i64 [[TMP8]], i1 false)
+; CHECK-NEXT:    [[TMP9:%.*]] = alloca i8, i64 [[TMP6]], align 8
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[VA_ARG_ORIGIN]], i64 [[TMP8]], i1 false)
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[ARGS:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+; CHECK-NEXT:    call void @__msan_poison_alloca(ptr [[ARGS]], i64 24, ptr @[[GLOB0:[0-9]+]])
+; CHECK-NEXT:    [[TMP10:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_1(ptr [[ARGS]])
+; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { ptr, ptr } [[TMP10]], 0
+; CHECK-NEXT:    [[TMP12:%.*]] = extractvalue { ptr, ptr } [[TMP10]], 1
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP11]], i8 0, i64 24, i1 false)
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    [[TMP13:%.*]] = ptrtoint ptr [[ARGS]] to i64
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[TMP13]], 16
+; CHECK-NEXT:    [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr
+; CHECK-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
+; CHECK-NEXT:    [[TMP17:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_1(ptr [[TMP16]])
+; CHECK-NEXT:    [[TMP18:%.*]] = extractvalue { ptr, ptr } [[TMP17]], 0
+; CHECK-NEXT:    [[TMP19:%.*]] = extractvalue { ptr, ptr } [[TMP17]], 1
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP18]], ptr align 16 [[TMP7]], i64 48, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP19]], ptr align 16 [[TMP9]], i64 48, i1 false)
+; CHECK-NEXT:    [[TMP20:%.*]] = ptrtoint ptr [[ARGS]] to i64
+; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[TMP20]], 8
+; CHECK-NEXT:    [[TMP22:%.*]] = inttoptr i64 [[TMP21]] to ptr
+; CHECK-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8
+; CHECK-NEXT:    [[TMP24:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_1(ptr [[TMP23]])
+; CHECK-NEXT:    [[TMP25:%.*]] = extractvalue { ptr, ptr } [[TMP24]], 0
+; CHECK-NEXT:    [[TMP26:%.*]] = extractvalue { ptr, ptr } [[TMP24]], 1
+; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr i8, ptr [[TMP7]], i32 48
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP25]], ptr align 16 [[TMP27]], i64 [[TMP5]], i1 false)
+; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[TMP9]], i32 48
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP26]], ptr align 16 [[TMP28]], i64 [[TMP5]], i1 false)
+; CHECK-NEXT:    [[TMP29:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[_MSARG1:%.*]] = inttoptr i64 [[TMP29]] to ptr
+; CHECK-NEXT:    store i64 [[TMP2]], ptr [[_MSARG1]], align 8
+; CHECK-NEXT:    [[TMP30:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64
+; CHECK-NEXT:    [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP30]] to ptr
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[_MSARG_O2]], align 4
+; CHECK-NEXT:    [[TMP31:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[TMP32:%.*]] = add i64 [[TMP31]], 8
+; CHECK-NEXT:    [[_MSARG3:%.*]] = inttoptr i64 [[TMP32]] to ptr
+; CHECK-NEXT:    store i64 0, ptr [[_MSARG3]], align 8
+; CHECK-NEXT:    store i32 0, ptr [[RETVAL_SHADOW]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @VAListFn(ptr [[FMT]], ptr nonnull [[ARGS]])
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr [[RETVAL_SHADOW]], align 8
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[RETVAL_ORIGIN]], align 4
+; CHECK-NEXT:    call void @llvm.va_end.p0(ptr nonnull [[ARGS]])
+; CHECK-NEXT:    store i32 [[_MSRET]], ptr [[RETVAL_SHADOW]], align 8
+; CHECK-NEXT:    store i32 [[TMP33]], ptr [[RETVAL_ORIGIN]], align 4
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
 entry:
   %args = alloca [1 x %struct.__va_list_tag], align 16
   call void @llvm.va_start(ptr nonnull %args)
@@ -330,52 +589,56 @@ entry:
 ; Kernel is built without SSE support.
 attributes #0 = { "target-features"="+fxsr,+x87,-sse" }
 
-; CHECK-LABEL: @VarArgFn
-; CHECK: @__msan_get_context_state()
-; CHECK: [[VA_ARG_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 2
-; CHECK: [[VA_ARG_ORIGIN:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 3
-; CHECK: [[VA_ARG_OVERFLOW_SIZE:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 4
 
-; CHECK: [[OSIZE:%[0-9]+]] = load i64, ptr [[VA_ARG_OVERFLOW_SIZE]]
 ; Register save area is 48 bytes for non-SSE builds.
-; CHECK: [[SIZE:%[0-9]+]] = add i64 48, [[OSIZE]]
-; CHECK: [[SHADOWS:%[0-9]+]] = alloca i8, i64 [[SIZE]]
-; CHECK: call void @llvm.memset{{.*}}(ptr align 8 [[SHADOWS]], i8 0, i64 [[SIZE]], i1 false)
-; CHECK: [[COPYSZ:%[0-9]+]] = call i64 @llvm.umin.i64(i64 [[SIZE]], i64 800)
-; CHECK: call void @llvm.memcpy{{.*}}(ptr align 8 [[SHADOWS]], ptr align 8 [[VA_ARG_SHADOW]], i64 [[COPYSZ]]
-; CHECK: [[ORIGINS:%[0-9]+]] = alloca i8, i64 [[SIZE]]
-; CHECK: call void @llvm.memcpy{{.*}}(ptr align 8 [[ORIGINS]], ptr align 8 [[VA_ARG_ORIGIN]], i64 [[COPYSZ]]
-; CHECK: call i32 @VAListFn
 
 ; Function Attrs: nounwind uwtable
 define dso_local void @VarArgCaller() local_unnamed_addr sanitize_memory {
+; CHECK-LABEL: define dso_local void @VarArgCaller(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__msan_get_context_state()
+; CHECK-NEXT:    [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0
+; CHECK-NEXT:    [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1
+; CHECK-NEXT:    [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2
+; CHECK-NEXT:    [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3
+; CHECK-NEXT:    [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4
+; CHECK-NEXT:    [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5
+; CHECK-NEXT:    [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; CHECK-NEXT:    store i64 0, ptr [[_MSARG]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP2]], 8
+; CHECK-NEXT:    [[_MSARG1:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    store i32 0, ptr [[_MSARG1]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[VA_ARG_SHADOW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP4]], 0
+; CHECK-NEXT:    [[_MSARG_VA_S:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[VA_ARG_ORIGIN]] to i64
+; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 0
+; CHECK-NEXT:    [[_MSARG_VA_O:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[VA_ARG_SHADOW]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[TMP8]], 8
+; CHECK-NEXT:    [[_MSARG_VA_S2:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[VA_ARG_ORIGIN]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], 8
+; CHECK-NEXT:    [[_MSARG_VA_O3:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT:    store i32 0, ptr [[_MSARG_VA_S2]], align 8
+; CHECK-NEXT:    store i32 0, ptr [[_MSARG_VA_O3]], align 8
+; CHECK-NEXT:    store i64 0, ptr [[VA_ARG_OVERFLOW_SIZE]], align 8
+; CHECK-NEXT:    store i32 0, ptr [[RETVAL_SHADOW]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (ptr, ...) @VarArgFn(ptr @.str, i32 123)
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr [[RETVAL_SHADOW]], align 8
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[RETVAL_ORIGIN]], align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   %call = tail call i32 (ptr, ...) @VarArgFn(ptr @.str, i32 123)
   ret void
 }
 
-; CHECK-LABEL: @VarArgCaller
-
-; CHECK: entry:
-; CHECK: @__msan_get_context_state()
-; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0
-; CHECK: [[VA_ARG_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 2
-; CHECK: [[VA_ARG_OVERFLOW_SIZE:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 4
-
-; CHECK: [[PARAM_SI:%[_a-z0-9]+]] = ptrtoint {{.*}} [[PARAM_SHADOW]]
-; CHECK: [[ARG1_S:%[_a-z0-9]+]] = inttoptr i64 [[PARAM_SI]] to ptr
-; First argument is initialized
-; CHECK: store i64 0, ptr [[ARG1_S]]
-
-; Dangling cast of va_arg_shadow[0], unused because the first argument is fixed.
-; CHECK: [[VA_CAST0:%[_a-z0-9]+]] = ptrtoint {{.*}} [[VA_ARG_SHADOW]] to i64
-
-; CHECK: [[VA_CAST1:%[_a-z0-9]+]] = ptrtoint {{.*}} [[VA_ARG_SHADOW]] to i64
-; CHECK: [[ARG1_SI:%[_a-z0-9]+]] = add i64 [[VA_CAST1]], 8
-; CHECK: [[PARG1_S:%[_a-z0-9]+]] = inttoptr i64 [[ARG1_SI]] to ptr
-
-; Shadow for 123 is 0.
-; CHECK: store i32 0, ptr [[ARG1_S]]
-
-; CHECK: store i64 0, ptr [[VA_ARG_OVERFLOW_SIZE]]
-; CHECK: call i32 (ptr, ...) @VarArgFn({{.*}} @.str{{.*}} i32 123)
+;.
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.

>From e6b32225717d3d6d7cbe90b3e2c17f6524c3902b Mon Sep 17 00:00:00 2001
From: mikael-nilsson-arm <mikael.nilsson at arm.com>
Date: Wed, 1 Oct 2025 10:13:42 +0200
Subject: [PATCH 24/48] [InstCombine] Opt phi(freeze(undef), C) -> phi(C, C)
 (#161181)

Try to choose a value for freeze that enables the PHI to be replaced
with its input constants if they are equal.
---
 .../InstCombine/InstructionCombining.cpp      |  31 +-
 .../Transforms/InstCombine/in-freeze-phi.ll   | 274 ++++++++++++++++++
 2 files changed, 303 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/in-freeze-phi.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 8fbaf68dfcc43..ff063f929347f 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -5169,6 +5169,7 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
   // - or: pick -1
   // - select's condition: if the true value is constant, choose it by making
   //                       the condition true.
+  // - phi: pick the common constant across operands
   // - default: pick 0
   //
   // Note that this transform is intentionally done here rather than
@@ -5179,9 +5180,32 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
   // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
   //       duplicating logic for binops at least.
   auto getUndefReplacement = [&](Type *Ty) {
-    Value *BestValue = nullptr;
+    auto pickCommonConstantFromPHI = [](PHINode &PN) -> Value * {
+      // phi(freeze(undef), C, C). Choose C for freeze so the PHI can be
+      // removed.
+      Constant *BestValue = nullptr;
+      for (Value *V : PN.incoming_values()) {
+        if (match(V, m_Freeze(m_Undef())))
+          continue;
+
+        Constant *C = dyn_cast<Constant>(V);
+        if (!C)
+          return nullptr;
+
+        if (!isGuaranteedNotToBeUndefOrPoison(C))
+          return nullptr;
+
+        if (BestValue && BestValue != C)
+          return nullptr;
+
+        BestValue = C;
+      }
+      return BestValue;
+    };
+
     Value *NullValue = Constant::getNullValue(Ty);
-    for (const auto *U : I.users()) {
+    Value *BestValue = nullptr;
+    for (auto *U : I.users()) {
       Value *V = NullValue;
       if (match(U, m_Or(m_Value(), m_Value())))
         V = ConstantInt::getAllOnesValue(Ty);
@@ -5190,6 +5214,9 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
       else if (match(U, m_c_Select(m_Specific(&I), m_Value(V)))) {
         if (!isGuaranteedNotToBeUndefOrPoison(V, &AC, &I, &DT))
           V = NullValue;
+      } else if (auto *PHI = dyn_cast<PHINode>(U)) {
+        if (Value *MaybeV = pickCommonConstantFromPHI(*PHI))
+          V = MaybeV;
       }
 
       if (!BestValue)
diff --git a/llvm/test/Transforms/InstCombine/in-freeze-phi.ll b/llvm/test/Transforms/InstCombine/in-freeze-phi.ll
new file mode 100644
index 0000000000000..917d81b499c49
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/in-freeze-phi.ll
@@ -0,0 +1,274 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+define i32 @phi_freeze_same_consts(i1 %c0, i1 %c1) {
+; CHECK-LABEL: define i32 @phi_freeze_same_consts(
+; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]]
+; CHECK:       [[BB_FREEZE]]:
+; CHECK-NEXT:    br label %[[FINAL:.*]]
+; CHECK:       [[BB_OTHER]]:
+; CHECK-NEXT:    br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]]
+; CHECK:       [[CA]]:
+; CHECK-NEXT:    br label %[[FINAL]]
+; CHECK:       [[CB]]:
+; CHECK-NEXT:    br label %[[FINAL]]
+; CHECK:       [[FINAL]]:
+; CHECK-NEXT:    ret i32 42
+;
+entry:
+  br i1 %c0, label %bb_freeze, label %bb_other
+
+bb_freeze:
+  %f = freeze i32 undef
+  br label %final
+
+bb_other:
+  br i1 %c1, label %cA, label %cB
+cA:
+  br label %final
+cB:
+  br label %final
+
+final:
+  %phi = phi i32 [ %f, %bb_freeze ], [ 42, %cA ], [ 42, %cB ]
+  ret i32 %phi
+}
+
+define i32 @phi_freeze_mixed_consts(i1 %c0, i1 %c1) {
+; CHECK-LABEL: define i32 @phi_freeze_mixed_consts(
+; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]]
+; CHECK:       [[BB_FREEZE]]:
+; CHECK-NEXT:    br label %[[FINAL:.*]]
+; CHECK:       [[BB_OTHER]]:
+; CHECK-NEXT:    br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]]
+; CHECK:       [[CA]]:
+; CHECK-NEXT:    br label %[[FINAL]]
+; CHECK:       [[CB]]:
+; CHECK-NEXT:    br label %[[FINAL]]
+; CHECK:       [[FINAL]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[BB_FREEZE]] ], [ 42, %[[CA]] ], [ 7, %[[CB]] ]
+; CHECK-NEXT:    ret i32 [[PHI]]
+;
+entry:
+  br i1 %c0, label %bb_freeze, label %bb_other
+
+bb_freeze:
+  %f = freeze i32 undef
+  br label %final
+
+bb_other:
+  br i1 %c1, label %cA, label %cB
+cA:
+  br label %final
+cB:
+  br label %final
+
+final:
+  %phi = phi i32 [ %f, %bb_freeze ], [ 42, %cA ], [ 7, %cB ]
+  ret i32 %phi
+}
+
+define i32 @phi_freeze_with_nonconst_incoming(i32 %x, i1 %c0, i1 %c1) {
+; CHECK-LABEL: define i32 @phi_freeze_with_nonconst_incoming(
+; CHECK-SAME: i32 [[X:%.*]], i1 [[C0:%.*]], i1 [[C1:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]]
+; CHECK:       [[BB_FREEZE]]:
+; CHECK-NEXT:    br label %[[FINAL:.*]]
+; CHECK:       [[BB_OTHER]]:
+; CHECK-NEXT:    br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]]
+; CHECK:       [[CA]]:
+; CHECK-NEXT:    br label %[[FINAL]]
+; CHECK:       [[CB]]:
+; CHECK-NEXT:    br label %[[FINAL]]
+; CHECK:       [[FINAL]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[BB_FREEZE]] ], [ [[X]], %[[CA]] ], [ 13, %[[CB]] ]
+; CHECK-NEXT:    ret i32 [[PHI]]
+;
+entry:
+  br i1 %c0, label %bb_freeze, label %bb_other
+
+bb_freeze:
+  %f = freeze i32 undef
+  br label %final
+
+bb_other:
+  br i1 %c1, label %cA, label %cB
+cA:
+  br label %final
+cB:
+  br label %final
+
+final:
+  %phi = phi i32 [ %f, %bb_freeze ], [ %x, %cA ], [ 13, %cB ]
+  ret i32 %phi
+}
+
+define <4 x i8> @phi_freeze_vector(i1 %c0, i1 %c1) {
+; CHECK-LABEL: define <4 x i8> @phi_freeze_vector(
+; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]]
+; CHECK:       [[BB_FREEZE]]:
+; CHECK-NEXT:    br label %[[FINAL:.*]]
+; CHECK:       [[BB_OTHER]]:
+; CHECK-NEXT:    br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]]
+; CHECK:       [[CA]]:
+; CHECK-NEXT:    br label %[[FINAL]]
+; CHECK:       [[CB]]:
+; CHECK-NEXT:    br label %[[FINAL]]
+; CHECK:       [[FINAL]]:
+; CHECK-NEXT:    ret <4 x i8> splat (i8 9)
+;
+entry:
+  br i1 %c0, label %bb_freeze, label %bb_other
+
+bb_freeze:
+  %f = freeze <4 x i8> undef
+  br label %final
+
+bb_other:
+  br i1 %c1, label %cA, label %cB
+
+cA:
+  br label %final
+
+cB:
+  br label %final
+
+final:
+  %phi = phi <4 x i8> [ %f, %bb_freeze ],
+  [<i8 9, i8 9, i8 9, i8 9>, %cA ],
+  [<i8 9, i8 9, i8 9, i8 9>, %cB ]
+  ret <4 x i8> %phi
+}
+
+define i32 @multi_use_one_folds_one_not_zero(i1 %c0, i1 %c1, i1 %c2) {
+; CHECK-LABEL: define i32 @multi_use_one_folds_one_not_zero(
+; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[C0]], label %[[BB_OTHER3:.*]], label %[[CC1:.*]]
+; CHECK:       [[BB_OTHER3]]:
+; CHECK-NEXT:    br label %[[MID:.*]]
+; CHECK:       [[CC1]]:
+; CHECK-NEXT:    br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]]
+; CHECK:       [[CA]]:
+; CHECK-NEXT:    br label %[[MID]]
+; CHECK:       [[CB]]:
+; CHECK-NEXT:    br label %[[MID]]
+; CHECK:       [[MID]]:
+; CHECK-NEXT:    [[PHI_FOLD:%.*]] = phi i32 [ 0, %[[BB_OTHER3]] ], [ 1, %[[CA]] ], [ 1, %[[CB]] ]
+; CHECK-NEXT:    br i1 [[C2]], label %[[BB_FREEZE2:.*]], label %[[CD:.*]]
+; CHECK:       [[BB_FREEZE2]]:
+; CHECK-NEXT:    br label %[[FINAL:.*]]
+; CHECK:       [[BB_OTHER2:.*:]]
+; CHECK-NEXT:    br i1 true, label %[[CA]], label %[[CB]]
+; CHECK:       [[CC:.*:]]
+; CHECK-NEXT:    br label %[[FINAL]]
+; CHECK:       [[CD]]:
+; CHECK-NEXT:    br label %[[FINAL]]
+; CHECK:       [[FINAL]]:
+; CHECK-NEXT:    ret i32 [[PHI_FOLD]]
+;
+entry:
+  %f = freeze i32 undef
+  br i1 %c0, label %bb_freeze, label %bb_other
+bb_freeze:
+  br label %mid
+bb_other:
+  br i1 %c1, label %cA, label %cB
+cA:
+  br label %mid
+cB:
+  br label %mid
+mid:
+  %phi_no_fold  = phi i32 [ %f, %bb_freeze ], [ 1, %cA ], [ 1, %cB ]
+  br i1 %c2, label %bb_freeze2, label %cD
+bb_freeze2:
+  br label %final
+bb_other2:
+  br i1 %c1, label %cA, label %cB
+cC:
+  br label %final
+cD:
+  br label %final
+final:
+  %phi_fold  = phi i32 [ %f, %bb_freeze2 ], [ 0, %cC ], [ 0, %cD ]
+  %a = add i32 %phi_fold, %phi_no_fold
+  ret i32 %a
+}
+
+define i32 @phi_freeze_poison(i1 %c0, i1 %c1) {
+; CHECK-LABEL: define i32 @phi_freeze_poison(
+; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]]
+; CHECK:       [[BB_FREEZE]]:
+; CHECK-NEXT:    br label %[[FINAL:.*]]
+; CHECK:       [[BB_OTHER]]:
+; CHECK-NEXT:    br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]]
+; CHECK:       [[CA]]:
+; CHECK-NEXT:    br label %[[FINAL]]
+; CHECK:       [[CB]]:
+; CHECK-NEXT:    br label %[[FINAL]]
+; CHECK:       [[FINAL]]:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br i1 %c0, label %bb_freeze, label %bb_other
+
+bb_freeze:
+  %f = freeze i32 undef
+  br label %final
+
+bb_other:
+  br i1 %c1, label %cA, label %cB
+cA:
+  br label %final
+cB:
+  br label %final
+
+final:
+  %phi = phi i32 [ %f, %bb_freeze ], [ poison, %cA ], [ poison, %cB ]
+  ret i32 %phi
+}
+
+define <2 x i32> @phi_freeze_poison_vec(i1 %c0, i1 %c1) {
+; CHECK-LABEL: define <2 x i32> @phi_freeze_poison_vec(
+; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]]
+; CHECK:       [[BB_FREEZE]]:
+; CHECK-NEXT:    br label %[[FINAL:.*]]
+; CHECK:       [[BB_OTHER]]:
+; CHECK-NEXT:    br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]]
+; CHECK:       [[CA]]:
+; CHECK-NEXT:    br label %[[FINAL]]
+; CHECK:       [[CB]]:
+; CHECK-NEXT:    br label %[[FINAL]]
+; CHECK:       [[FINAL]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB_FREEZE]] ], [ <i32 poison, i32 1>, %[[CA]] ], [ <i32 poison, i32 1>, %[[CB]] ]
+; CHECK-NEXT:    ret <2 x i32> [[PHI]]
+;
+entry:
+  br i1 %c0, label %bb_freeze, label %bb_other
+
+bb_freeze:
+  %f = freeze <2 x i32> undef
+  br label %final
+
+bb_other:
+  br i1 %c1, label %cA, label %cB
+cA:
+  br label %final
+cB:
+  br label %final
+
+final:
+  %phi = phi <2 x i32> [ %f, %bb_freeze ], [ <i32 poison, i32 1>, %cA ], [ <i32 poison, i32 1>, %cB ]
+  ret <2 x i32> %phi
+}

>From 168616648f553b4b9cf826f8e14d6a948d978bf8 Mon Sep 17 00:00:00 2001
From: Muhammad Bassiouni <60100307+bassiounix at users.noreply.github.com>
Date: Wed, 1 Oct 2025 11:21:19 +0300
Subject: [PATCH 25/48] [libc][math] Refactor exp10m1f16 implementation to
 header-only in src/__support/math folder. (#161119)

Part of #147386

in preparation for: https://discourse.llvm.org/t/rfc-make-clang-builtin-math-functions-constexpr-with-llvm-libc-to-support-c-23-constexpr-math-functions/86450
---
 libc/shared/math.h                            |   1 +
 libc/shared/math/exp10m1f16.h                 |  29 +++
 libc/src/__support/math/CMakeLists.txt        |  17 ++
 libc/src/__support/math/exp10m1f16.h          | 185 ++++++++++++++++++
 libc/src/math/generic/CMakeLists.txt          |  13 +-
 libc/src/math/generic/exp10m1f16.cpp          | 158 +--------------
 libc/test/shared/CMakeLists.txt               |   1 +
 libc/test/shared/shared_math_test.cpp         |   1 +
 .../llvm-project-overlay/libc/BUILD.bazel     |  18 +-
 9 files changed, 254 insertions(+), 169 deletions(-)
 create mode 100644 libc/shared/math/exp10m1f16.h
 create mode 100644 libc/src/__support/math/exp10m1f16.h

diff --git a/libc/shared/math.h b/libc/shared/math.h
index cccd6a375930e..4b2a0d8c712ad 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -46,6 +46,7 @@
 #include "math/exp10f.h"
 #include "math/exp10f16.h"
 #include "math/exp10m1f.h"
+#include "math/exp10m1f16.h"
 #include "math/expf.h"
 #include "math/expf16.h"
 #include "math/frexpf.h"
diff --git a/libc/shared/math/exp10m1f16.h b/libc/shared/math/exp10m1f16.h
new file mode 100644
index 0000000000000..5f18f2986207e
--- /dev/null
+++ b/libc/shared/math/exp10m1f16.h
@@ -0,0 +1,29 @@
+//===-- Shared exp10m1f16 function ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_EXP10M1F16_H
+#define LLVM_LIBC_SHARED_MATH_EXP10M1F16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+#include "shared/libc_common.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/math/exp10m1f16.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::exp10m1f16;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SHARED_MATH_EXP10M1F16_H
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 84c1b15498672..98f9bb42f91f4 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -498,6 +498,23 @@ add_header_library(
     libc.src.__support.macros.optimization
 )
 
+add_header_library(
+  exp10m1f16
+  HDRS
+    exp10m1f16.h
+  DEPENDS
+    .exp10f16_utils
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.except_value_utils
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.macros.optimization
+    libc.src.__support.macros.properties.cpu_features
+)
+
 add_header_library(
   erff
   HDRS
diff --git a/libc/src/__support/math/exp10m1f16.h b/libc/src/__support/math/exp10m1f16.h
new file mode 100644
index 0000000000000..6367a857fa98a
--- /dev/null
+++ b/libc/src/__support/math/exp10m1f16.h
@@ -0,0 +1,185 @@
+//===-- Implementation header for exp10m1f16 --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP10M1F16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP10M1F16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "exp10f16_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/macros/properties/cpu_features.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float16 exp10m1f16(float16 x) {
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  constexpr fputil::ExceptValues<float16, 3> EXP10M1F16_EXCEPTS_LO = {{
+      // (input, RZ output, RU offset, RD offset, RN offset)
+      // x = 0x1.5c4p-4, exp10m1f16(x) = 0x1.bacp-3 (RZ)
+      {0x2d71U, 0x32ebU, 1U, 0U, 0U},
+      // x = -0x1.5ep-13, exp10m1f16(x) = -0x1.92cp-12 (RZ)
+      {0x8978U, 0x8e4bU, 0U, 1U, 0U},
+      // x = -0x1.e2p-10, exp10m1f16(x) = -0x1.14cp-8 (RZ)
+      {0x9788U, 0x9c53U, 0U, 1U, 0U},
+  }};
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
+  constexpr size_t N_EXP10M1F16_EXCEPTS_HI = 3;
+#else
+  constexpr size_t N_EXP10M1F16_EXCEPTS_HI = 6;
+#endif
+
+  constexpr fputil::ExceptValues<float16, N_EXP10M1F16_EXCEPTS_HI>
+      EXP10M1F16_EXCEPTS_HI = {{
+          // (input, RZ output, RU offset, RD offset, RN offset)
+          // x = 0x1.8f4p-2, exp10m1f16(x) = 0x1.744p+0 (RZ)
+          {0x363dU, 0x3dd1U, 1U, 0U, 0U},
+          // x = 0x1.95cp-2, exp10m1f16(x) = 0x1.7d8p+0 (RZ)
+          {0x3657U, 0x3df6U, 1U, 0U, 0U},
+          // x = 0x1.d04p-2, exp10m1f16(x) = 0x1.d7p+0 (RZ)
+          {0x3741U, 0x3f5cU, 1U, 0U, 1U},
+#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
+          // x = 0x1.0cp+1, exp10m1f16(x) = 0x1.ec4p+6 (RZ)
+          {0x4030U, 0x57b1U, 1U, 0U, 1U},
+          // x = 0x1.1b8p+1, exp10m1f16(x) = 0x1.45cp+7 (RZ)
+          {0x406eU, 0x5917U, 1U, 0U, 1U},
+          // x = 0x1.2f4p+2, exp10m1f16(x) = 0x1.ab8p+15 (RZ)
+          {0x44bdU, 0x7aaeU, 1U, 0U, 1U},
+#endif
+      }};
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  using FPBits = fputil::FPBits<float16>;
+  FPBits x_bits(x);
+
+  uint16_t x_u = x_bits.uintval();
+  uint16_t x_abs = x_u & 0x7fffU;
+
+  // When |x| <= 2^(-3), or |x| >= 11 * log10(2), or x is NaN.
+  if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x429fU)) {
+    // exp10m1(NaN) = NaN
+    if (x_bits.is_nan()) {
+      if (x_bits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+
+      return x;
+    }
+
+    // When x >= 16 * log10(2).
+    if (x_u >= 0x44d1U && x_bits.is_pos()) {
+      // exp10m1(+inf) = +inf
+      if (x_bits.is_inf())
+        return FPBits::inf().get_val();
+
+      switch (fputil::quick_get_round()) {
+      case FE_TONEAREST:
+      case FE_UPWARD:
+        fputil::set_errno_if_required(ERANGE);
+        fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
+        return FPBits::inf().get_val();
+      default:
+        return FPBits::max_normal().get_val();
+      }
+    }
+
+    // When x < -11 * log10(2).
+    if (x_u > 0xc29fU) {
+      // exp10m1(-inf) = -1
+      if (x_bits.is_inf())
+        return FPBits::one(Sign::NEG).get_val();
+
+      // When x >= -0x1.ce4p+1, round(10^x - 1, HP, RN) = -0x1.ffcp-1.
+      if (x_u <= 0xc339U) {
+        return fputil::round_result_slightly_down(
+            fputil::cast<float16>(-0x1.ffcp-1));
+      }
+
+      // When x < -0x1.ce4p+1, round(10^x - 1, HP, RN) = -1.
+      switch (fputil::quick_get_round()) {
+      case FE_TONEAREST:
+      case FE_DOWNWARD:
+        return FPBits::one(Sign::NEG).get_val();
+      default:
+        return fputil::cast<float16>(-0x1.ffcp-1);
+      }
+    }
+
+    // When |x| <= 2^(-3).
+    if (x_abs <= 0x3000U) {
+      if (LIBC_UNLIKELY(x_abs == 0))
+        return x;
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+      if (auto r = EXP10M1F16_EXCEPTS_LO.lookup(x_u);
+          LIBC_UNLIKELY(r.has_value()))
+        return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+      float xf = x;
+      // Degree-5 minimax polynomial generated by Sollya with the following
+      // commands:
+      //   > display = hexadecimal;
+      //   > P = fpminimax((10^x - 1)/x, 4, [|SG...|], [-2^-3, 2^-3]);
+      //   > x * P;
+      return fputil::cast<float16>(
+          xf * fputil::polyeval(xf, 0x1.26bb1cp+1f, 0x1.5351c8p+1f,
+                                0x1.04704p+1f, 0x1.2ce084p+0f, 0x1.14a6bep-1f));
+    }
+  }
+
+  // When x is 1, 2, or 3. These are hard-to-round cases with exact results.
+  // 10^4 - 1 = 9'999 is not exactly representable as a float16, but luckily the
+  // polynomial approximation gives the correct result for x = 4 in all
+  // rounding modes.
+  if (LIBC_UNLIKELY((x_u & ~(0x3c00U | 0x4000U | 0x4200U | 0x4400U)) == 0)) {
+    switch (x_u) {
+    case 0x3c00U: // x = 1.0f16
+      return fputil::cast<float16>(9.0);
+    case 0x4000U: // x = 2.0f16
+      return fputil::cast<float16>(99.0);
+    case 0x4200U: // x = 3.0f16
+      return fputil::cast<float16>(999.0);
+    }
+  }
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  if (auto r = EXP10M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+    return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  // exp10(x) = exp2((hi + mid) * log2(10)) * exp10(lo)
+  auto [exp2_hi_mid, exp10_lo] = exp10_range_reduction(x);
+  // exp10m1(x) = exp2((hi + mid) * log2(lo)) * exp10(lo) - 1
+  return fputil::cast<float16>(
+      fputil::multiply_add(exp2_hi_mid, exp10_lo, -1.0f));
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10M1F16_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 8074a3925626c..99c1b08326d53 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1603,18 +1603,7 @@ add_entrypoint_object(
   HDRS
     ../exp10m1f16.h
   DEPENDS
-    libc.hdr.errno_macros
-    libc.hdr.fenv_macros
-    libc.src.__support.FPUtil.cast
-    libc.src.__support.FPUtil.except_value_utils
-    libc.src.__support.FPUtil.fenv_impl
-    libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.FPUtil.rounding_mode
-    libc.src.__support.macros.optimization
-    libc.src.__support.macros.properties.cpu_features
-    libc.src.__support.math.exp10f16_utils
+    libc.src.__support.math.exp10m1f16
 )
 
 add_entrypoint_object(
diff --git a/libc/src/math/generic/exp10m1f16.cpp b/libc/src/math/generic/exp10m1f16.cpp
index 6c2fdbea418df..8a3c4abf1f10e 100644
--- a/libc/src/math/generic/exp10m1f16.cpp
+++ b/libc/src/math/generic/exp10m1f16.cpp
@@ -7,166 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/exp10m1f16.h"
-#include "hdr/errno_macros.h"
-#include "hdr/fenv_macros.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/cast.h"
-#include "src/__support/FPUtil/except_value_utils.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/rounding_mode.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h"
-#include "src/__support/macros/properties/cpu_features.h"
-#include "src/__support/math/exp10f16_utils.h"
+#include "src/__support/math/exp10m1f16.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-static constexpr fputil::ExceptValues<float16, 3> EXP10M1F16_EXCEPTS_LO = {{
-    // (input, RZ output, RU offset, RD offset, RN offset)
-    // x = 0x1.5c4p-4, exp10m1f16(x) = 0x1.bacp-3 (RZ)
-    {0x2d71U, 0x32ebU, 1U, 0U, 0U},
-    // x = -0x1.5ep-13, exp10m1f16(x) = -0x1.92cp-12 (RZ)
-    {0x8978U, 0x8e4bU, 0U, 1U, 0U},
-    // x = -0x1.e2p-10, exp10m1f16(x) = -0x1.14cp-8 (RZ)
-    {0x9788U, 0x9c53U, 0U, 1U, 0U},
-}};
-
-#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
-static constexpr size_t N_EXP10M1F16_EXCEPTS_HI = 3;
-#else
-static constexpr size_t N_EXP10M1F16_EXCEPTS_HI = 6;
-#endif
-
-static constexpr fputil::ExceptValues<float16, N_EXP10M1F16_EXCEPTS_HI>
-    EXP10M1F16_EXCEPTS_HI = {{
-        // (input, RZ output, RU offset, RD offset, RN offset)
-        // x = 0x1.8f4p-2, exp10m1f16(x) = 0x1.744p+0 (RZ)
-        {0x363dU, 0x3dd1U, 1U, 0U, 0U},
-        // x = 0x1.95cp-2, exp10m1f16(x) = 0x1.7d8p+0 (RZ)
-        {0x3657U, 0x3df6U, 1U, 0U, 0U},
-        // x = 0x1.d04p-2, exp10m1f16(x) = 0x1.d7p+0 (RZ)
-        {0x3741U, 0x3f5cU, 1U, 0U, 1U},
-#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
-        // x = 0x1.0cp+1, exp10m1f16(x) = 0x1.ec4p+6 (RZ)
-        {0x4030U, 0x57b1U, 1U, 0U, 1U},
-        // x = 0x1.1b8p+1, exp10m1f16(x) = 0x1.45cp+7 (RZ)
-        {0x406eU, 0x5917U, 1U, 0U, 1U},
-        // x = 0x1.2f4p+2, exp10m1f16(x) = 0x1.ab8p+15 (RZ)
-        {0x44bdU, 0x7aaeU, 1U, 0U, 1U},
-#endif
-    }};
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
 LLVM_LIBC_FUNCTION(float16, exp10m1f16, (float16 x)) {
-  using FPBits = fputil::FPBits<float16>;
-  FPBits x_bits(x);
-
-  uint16_t x_u = x_bits.uintval();
-  uint16_t x_abs = x_u & 0x7fffU;
-
-  // When |x| <= 2^(-3), or |x| >= 11 * log10(2), or x is NaN.
-  if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x429fU)) {
-    // exp10m1(NaN) = NaN
-    if (x_bits.is_nan()) {
-      if (x_bits.is_signaling_nan()) {
-        fputil::raise_except_if_required(FE_INVALID);
-        return FPBits::quiet_nan().get_val();
-      }
-
-      return x;
-    }
-
-    // When x >= 16 * log10(2).
-    if (x_u >= 0x44d1U && x_bits.is_pos()) {
-      // exp10m1(+inf) = +inf
-      if (x_bits.is_inf())
-        return FPBits::inf().get_val();
-
-      switch (fputil::quick_get_round()) {
-      case FE_TONEAREST:
-      case FE_UPWARD:
-        fputil::set_errno_if_required(ERANGE);
-        fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
-        return FPBits::inf().get_val();
-      default:
-        return FPBits::max_normal().get_val();
-      }
-    }
-
-    // When x < -11 * log10(2).
-    if (x_u > 0xc29fU) {
-      // exp10m1(-inf) = -1
-      if (x_bits.is_inf())
-        return FPBits::one(Sign::NEG).get_val();
-
-      // When x >= -0x1.ce4p+1, round(10^x - 1, HP, RN) = -0x1.ffcp-1.
-      if (x_u <= 0xc339U) {
-        return fputil::round_result_slightly_down(
-            fputil::cast<float16>(-0x1.ffcp-1));
-      }
-
-      // When x < -0x1.ce4p+1, round(10^x - 1, HP, RN) = -1.
-      switch (fputil::quick_get_round()) {
-      case FE_TONEAREST:
-      case FE_DOWNWARD:
-        return FPBits::one(Sign::NEG).get_val();
-      default:
-        return fputil::cast<float16>(-0x1.ffcp-1);
-      }
-    }
-
-    // When |x| <= 2^(-3).
-    if (x_abs <= 0x3000U) {
-      if (LIBC_UNLIKELY(x_abs == 0))
-        return x;
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-      if (auto r = EXP10M1F16_EXCEPTS_LO.lookup(x_u);
-          LIBC_UNLIKELY(r.has_value()))
-        return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-      float xf = x;
-      // Degree-5 minimax polynomial generated by Sollya with the following
-      // commands:
-      //   > display = hexadecimal;
-      //   > P = fpminimax((10^x - 1)/x, 4, [|SG...|], [-2^-3, 2^-3]);
-      //   > x * P;
-      return fputil::cast<float16>(
-          xf * fputil::polyeval(xf, 0x1.26bb1cp+1f, 0x1.5351c8p+1f,
-                                0x1.04704p+1f, 0x1.2ce084p+0f, 0x1.14a6bep-1f));
-    }
-  }
-
-  // When x is 1, 2, or 3. These are hard-to-round cases with exact results.
-  // 10^4 - 1 = 9'999 is not exactly representable as a float16, but luckily the
-  // polynomial approximation gives the correct result for x = 4 in all
-  // rounding modes.
-  if (LIBC_UNLIKELY((x_u & ~(0x3c00U | 0x4000U | 0x4200U | 0x4400U)) == 0)) {
-    switch (x_u) {
-    case 0x3c00U: // x = 1.0f16
-      return fputil::cast<float16>(9.0);
-    case 0x4000U: // x = 2.0f16
-      return fputil::cast<float16>(99.0);
-    case 0x4200U: // x = 3.0f16
-      return fputil::cast<float16>(999.0);
-    }
-  }
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-  if (auto r = EXP10M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
-    return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-  // exp10(x) = exp2((hi + mid) * log2(10)) * exp10(lo)
-  auto [exp2_hi_mid, exp10_lo] = exp10_range_reduction(x);
-  // exp10m1(x) = exp2((hi + mid) * log2(lo)) * exp10(lo) - 1
-  return fputil::cast<float16>(
-      fputil::multiply_add(exp2_hi_mid, exp10_lo, -1.0f));
+  return math::exp10m1f16(x);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt
index 13a0aae5d4c67..ea4634cbe7f9f 100644
--- a/libc/test/shared/CMakeLists.txt
+++ b/libc/test/shared/CMakeLists.txt
@@ -37,6 +37,7 @@ add_fp_unittest(
     libc.src.__support.math.cospif16
     libc.src.__support.math.dsqrtl
     libc.src.__support.math.exp10m1f
+    libc.src.__support.math.exp10m1f16
     libc.src.__support.math.erff
     libc.src.__support.math.exp
     libc.src.__support.math.exp10
diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp
index 25bf5ad8ae411..17221932927b0 100644
--- a/libc/test/shared/shared_math_test.cpp
+++ b/libc/test/shared/shared_math_test.cpp
@@ -27,6 +27,7 @@ TEST(LlvmLibcSharedMathTest, AllFloat16) {
   EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::coshf16(0.0f16));
   EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::cospif16(0.0f16));
   EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp10f16(0.0f16));
+  EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::exp10m1f16(0.0f16));
 
   EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::expf16(0.0f16));
 
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 8d9e80393bf20..e57d9dea036dd 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -2673,6 +2673,22 @@ libc_support_library(
     ],
 )
 
+libc_support_library(
+    name = "__support_math_exp10m1f16",
+    hdrs = ["src/__support/math/exp10m1f16.h"],
+    deps = [
+        ":__support_fputil_except_value_utils",
+        ":__support_fputil_fenv_impl",
+        ":__support_fputil_fp_bits",
+        ":__support_fputil_multiply_add",
+        ":__support_fputil_polyeval",
+        ":__support_fputil_rounding_mode",
+        ":__support_macros_optimization",
+        ":__support_math_exp10f16_utils",
+        ":errno",
+    ],
+)
+
 libc_support_library(
     name = "__support_math_erff",
     hdrs = ["src/__support/math/erff.h"],
@@ -3622,7 +3638,7 @@ libc_math_function(
 libc_math_function(
     name = "exp10m1f16",
     additional_deps = [
-        ":__support_math_exp10f16_utils",
+        ":__support_math_exp10m1f16",
     ],
 )
 

>From 2a17cc3d1fabad9cc8e575499c5efe300df3200e Mon Sep 17 00:00:00 2001
From: Henrich Lauko <xlauko at mail.muni.cz>
Date: Wed, 1 Oct 2025 10:44:05 +0200
Subject: [PATCH 26/48] [CIR] Refactor cir.cast to use uniform assembly form
 w/o parens, commas (#161431)

This mirrors incubator changes from https://github.com/llvm/clangir/pull/1922
---
 clang/include/clang/CIR/Dialect/IR/CIROps.td  | 28 +++----
 clang/lib/CIR/CodeGen/CIRGenRecordLayout.h    |  2 +-
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp |  2 +-
 .../CIR/CodeGen/aapcs-volatile-bitfields.c    | 10 +--
 clang/test/CIR/CodeGen/array-ctor.cpp         |  6 +-
 clang/test/CIR/CodeGen/array-dtor.cpp         |  8 +-
 clang/test/CIR/CodeGen/array.cpp              | 32 ++++----
 clang/test/CIR/CodeGen/assign-operator.cpp    |  6 +-
 clang/test/CIR/CodeGen/basic.c                |  2 +-
 clang/test/CIR/CodeGen/basic.cpp              |  2 +-
 clang/test/CIR/CodeGen/binassign.c            |  2 +-
 clang/test/CIR/CodeGen/binop.c                |  6 +-
 clang/test/CIR/CodeGen/binop.cpp              | 12 +--
 clang/test/CIR/CodeGen/builtin_bit.cpp        | 38 ++++-----
 clang/test/CIR/CodeGen/builtin_call.cpp       |  8 +-
 clang/test/CIR/CodeGen/builtin_printf.cpp     |  4 +-
 clang/test/CIR/CodeGen/cast.cpp               | 30 +++----
 clang/test/CIR/CodeGen/cmp.cpp                |  4 +-
 clang/test/CIR/CodeGen/comma.c                |  2 +-
 clang/test/CIR/CodeGen/complex-cast.cpp       | 80 +++++++++----------
 .../CodeGen/complex-compound-assignment.cpp   | 28 +++----
 clang/test/CIR/CodeGen/complex-mul-div.cpp    | 24 +++---
 clang/test/CIR/CodeGen/complex-unary.cpp      | 24 +++---
 clang/test/CIR/CodeGen/complex.cpp            | 38 ++++-----
 clang/test/CIR/CodeGen/cxx-default-init.cpp   |  6 +-
 clang/test/CIR/CodeGen/delegating-ctor.cpp    | 12 +--
 clang/test/CIR/CodeGen/delete.cpp             |  4 +-
 clang/test/CIR/CodeGen/destructors.cpp        |  4 +-
 .../CIR/CodeGen/finegrain-bitfield-access.cpp | 16 ++--
 clang/test/CIR/CodeGen/if.cpp                 |  6 +-
 clang/test/CIR/CodeGen/int-to-bool.cpp        |  8 +-
 clang/test/CIR/CodeGen/loop.cpp               | 12 +--
 clang/test/CIR/CodeGen/new.cpp                | 16 ++--
 clang/test/CIR/CodeGen/no-prototype.c         |  6 +-
 clang/test/CIR/CodeGen/opaque.c               |  4 +-
 clang/test/CIR/CodeGen/opaque.cpp             |  6 +-
 clang/test/CIR/CodeGen/pointers.cpp           |  2 +-
 clang/test/CIR/CodeGen/ternary.cpp            |  2 +-
 clang/test/CIR/CodeGen/unary.cpp              | 32 ++++----
 clang/test/CIR/CodeGen/union.c                | 10 +--
 clang/test/CIR/CodeGen/var_arg.c              | 12 +--
 .../CIR/CodeGen/variable-decomposition.cpp    |  2 +-
 clang/test/CIR/CodeGen/vbase.cpp              | 10 +--
 clang/test/CIR/CodeGen/vector-ext.cpp         |  8 +-
 clang/test/CIR/CodeGen/vector.cpp             |  8 +-
 clang/test/CIR/CodeGen/vtt.cpp                | 38 ++++-----
 clang/test/CIR/CodeGenOpenACC/combined-copy.c |  2 +-
 .../combined-firstprivate-clause.cpp          | 74 ++++++++---------
 .../combined-private-clause.cpp               |  2 +-
 .../combined-reduction-clause-default-ops.cpp | 18 ++---
 .../combined-reduction-clause-float.cpp       | 18 ++---
 .../combined-reduction-clause-inline-ops.cpp  | 36 ++++-----
 .../combined-reduction-clause-int.cpp         | 18 ++---
 .../combined-reduction-clause-outline-ops.cpp | 36 ++++-----
 clang/test/CIR/CodeGenOpenACC/combined.cpp    |  6 +-
 .../compute-firstprivate-clause.c             | 36 ++++-----
 .../compute-firstprivate-clause.cpp           | 74 ++++++++---------
 .../CodeGenOpenACC/compute-private-clause.cpp |  2 +-
 .../compute-reduction-clause-default-ops.c    | 18 ++---
 .../compute-reduction-clause-default-ops.cpp  | 18 ++---
 .../compute-reduction-clause-float.c          | 18 ++---
 .../compute-reduction-clause-float.cpp        | 18 ++---
 .../compute-reduction-clause-inline-ops.cpp   | 36 ++++-----
 .../compute-reduction-clause-int.c            | 18 ++---
 .../compute-reduction-clause-int.cpp          | 18 ++---
 .../compute-reduction-clause-outline-ops.cpp  | 36 ++++-----
 .../compute-reduction-clause-unsigned-int.c   | 18 ++---
 clang/test/CIR/CodeGenOpenACC/data.c          |  4 +-
 clang/test/CIR/CodeGenOpenACC/host_data.c     |  4 +-
 clang/test/CIR/CodeGenOpenACC/init.c          |  6 +-
 clang/test/CIR/CodeGenOpenACC/kernels.c       | 10 +--
 .../CodeGenOpenACC/loop-private-clause.cpp    |  2 +-
 .../loop-reduction-clause-default-ops.cpp     | 18 ++---
 .../loop-reduction-clause-float.cpp           | 18 ++---
 .../loop-reduction-clause-inline-ops.cpp      | 36 ++++-----
 .../loop-reduction-clause-int.cpp             | 18 ++---
 .../loop-reduction-clause-outline-ops.cpp     | 36 ++++-----
 clang/test/CIR/CodeGenOpenACC/parallel.c      | 10 +--
 .../private-clause-array-recipes-CtorDtor.cpp | 38 ++++-----
 .../private-clause-array-recipes-NoOps.cpp    | 10 +--
 ...-clause-pointer-array-recipes-CtorDtor.cpp | 12 +--
 clang/test/CIR/CodeGenOpenACC/serial.c        | 10 +--
 clang/test/CIR/CodeGenOpenACC/set.c           |  4 +-
 clang/test/CIR/CodeGenOpenACC/shutdown.c      |  6 +-
 clang/test/CIR/CodeGenOpenACC/wait.c          |  6 +-
 clang/test/CIR/IR/alloca.cir                  |  4 +-
 clang/test/CIR/IR/binassign.cir               |  4 +-
 clang/test/CIR/IR/cast.cir                    |  8 +-
 clang/test/CIR/IR/cmp.cir                     | 48 +++++------
 clang/test/CIR/IR/vtable-addrpt.cir           |  2 +-
 clang/test/CIR/IR/vtt-addrpoint.cir           |  2 +-
 clang/test/CIR/Lowering/cast.cir              | 34 ++++----
 clang/test/CIR/Lowering/if.cir                |  8 +-
 clang/test/CIR/Lowering/vtt-addrpoint.cir     |  2 +-
 clang/test/CIR/Transforms/canonicalize.cir    | 28 +++----
 clang/test/CIR/Transforms/if.cir              |  8 +-
 clang/test/CIR/Transforms/switch.cir          |  8 +-
 97 files changed, 773 insertions(+), 773 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index e1be08c1bbbbd..f857cf82a5192 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -209,9 +209,10 @@ def CIR_CastOp : CIR_Op<"cast", [
     Example:
 
     ```mlir
-    %4 = cir.cast(int_to_bool, %3 : i32), !cir.bool
+    %4 = cir.cast int_to_bool %3 : i32 -> !cir.bool
     ...
-    %x = cir.cast(array_to_ptrdecay, %0 : !cir.ptr<!cir.array<i32 x 10>>), !cir.ptr<i32>
+    %x = cir.cast array_to_ptrdecay %0 
+       : !cir.ptr<!cir.array<i32 x 10>> -> !cir.ptr<i32>
     ```
   }];
 
@@ -219,8 +220,7 @@ def CIR_CastOp : CIR_Op<"cast", [
   let results = (outs CIR_AnyType:$result);
 
   let assemblyFormat = [{
-    `(` $kind `,` $src `:` type($src) `)`
-    `,` type($result) attr-dict
+    $kind $src `:` type($src) `->` type($result) attr-dict
   }];
 
   // The input and output types should match the cast kind.
@@ -1176,7 +1176,7 @@ def CIR_GotoOp : CIR_Op<"goto", [Terminator]> {
   ```mlir
     cir.scope {  // REGION #1
       %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
-      %3 = cir.cast(int_to_bool, %2 : !s32i), !cir.bool
+      %3 = cir.cast int_to_bool %2 : !s32i -> !cir.bool
       cir.if %3 {
         cir.goto "label"
       }
@@ -3994,9 +3994,9 @@ def CIR_VAStartOp : CIR_Op<"va_start"> {
 
     ```mlir
     // %args : !cir.ptr<!cir.array<!rec___va_list_tag x 1>>
-    %p = cir.cast(array_to_ptrdecay, %args
-          : !cir.ptr<!cir.array<!rec___va_list_tag x 1>>),
-        !cir.ptr<!rec___va_list_tag>
+    %p = cir.cast array_to_ptrdecay %args
+          : !cir.ptr<!cir.array<!rec___va_list_tag x 1>>)
+          -> !cir.ptr<!rec___va_list_tag>
     %count = cir.load %0 : !cir.ptr<!s32i>, !s32i
     cir.va_start %p %count : !cir.ptr<!rec___va_list_tag>, !s32i
     ```
@@ -4033,9 +4033,9 @@ def CIR_VAEndOp : CIR_Op<"va_end"> {
     Example:
     ```mlir
     // %args : !cir.ptr<!cir.array<!rec___va_list_tag x 1>>
-    %p = cir.cast(array_to_ptrdecay, %args
-          : !cir.ptr<!cir.array<!rec___va_list_tag x 1>>),
-        !cir.ptr<!rec___va_list_tag>
+    %p = cir.cast array_to_ptrdecay %args
+          : !cir.ptr<!cir.array<!rec___va_list_tag x 1>>
+          -> !cir.ptr<!rec___va_list_tag>
     cir.va_end %p : !cir.ptr<!rec___va_list_tag>
     ```
   }];
@@ -4068,9 +4068,9 @@ def CIR_VAArgOp : CIR_Op<"va_arg"> {
     Example:
     ```mlir
     // %args : !cir.ptr<!cir.array<!rec___va_list_tag x 1>>
-    %p = cir.cast(array_to_ptrdecay, %args
-            : !cir.ptr<!cir.array<!rec___va_list_tag x 1>>),
-          !cir.ptr<!rec___va_list_tag>
+    %p = cir.cast array_to_ptrdecay %args
+            : !cir.ptr<!cir.array<!rec___va_list_tag x 1>>
+            -> !cir.ptr<!rec___va_list_tag>
     cir.va.start %p : !cir.ptr<!rec___va_list_tag>
 
     // Fetch an `int` from the vararg list.
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
index 914ef16c2a5ee..bf0ddc5875059 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
@@ -57,7 +57,7 @@ namespace clang::CIRGen {
 ///   cir.func @store_field() {
 ///     %0 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s"] {alignment = 4 : i64}
 ///     %1 = cir.const #cir.int<2> : !s32i
-///     %2 = cir.cast(integral, %1 : !s32i), !u32i
+///     %2 = cir.cast integral %1 : !s32i -> !u32i
 ///     %3 = cir.get_member %0[3] {name = "more_bits"} : !cir.ptr<!rec_S> ->
 ///     !cir.ptr<!u16i>
 ///     %4 = cir.set_bitfield(#bfi_more_bits, %3 :
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 0f309e42bcd4c..22f069d9cead0 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -2425,7 +2425,7 @@ static void prepareTypeConverter(mlir::LLVMTypeConverter &converter,
 // For instance, this CIR code:
 //
 //    cir.func @foo(%arg0: !s32i) -> !s32i {
-//      %4 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+//      %4 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
 //      cir.if %4 {
 //        %5 = cir.const #cir.int<1> : !s32i
 //        cir.return %5 : !s32i
diff --git a/clang/test/CIR/CodeGen/aapcs-volatile-bitfields.c b/clang/test/CIR/CodeGen/aapcs-volatile-bitfields.c
index 00378f725d76a..92eae6aab6800 100644
--- a/clang/test/CIR/CodeGen/aapcs-volatile-bitfields.c
+++ b/clang/test/CIR/CodeGen/aapcs-volatile-bitfields.c
@@ -86,7 +86,7 @@ int check_load(st1 *s1) {
 // CIR:    [[LOAD:%.*]] = cir.load align(8) {{.*}} : !cir.ptr<!cir.ptr<!rec_st1>>, !cir.ptr<!rec_st1>
 // CIR:    [[MEMBER:%.*]] = cir.get_member [[LOAD]][0] {name = "b"} : !cir.ptr<!rec_st1> -> !cir.ptr<!u16i>
 // CIR:    [[BITFI:%.*]] = cir.get_bitfield align(4) (#bfi_b, [[MEMBER]] {is_volatile} : !cir.ptr<!u16i>) -> !u32i
-// CIR:    [[CAST:%.*]] = cir.cast(integral, [[BITFI]] : !u32i), !s32i
+// CIR:    [[CAST:%.*]] = cir.cast integral [[BITFI]] : !u32i -> !s32i
 // CIR:    cir.store [[CAST]], [[RETVAL:%.*]] : !s32i, !cir.ptr<!s32i>
 // CIR:    [[RET:%.*]] = cir.load [[RETVAL]] : !cir.ptr<!s32i>, !s32i
 // CIR:    cir.return [[RET]] : !s32i
@@ -118,7 +118,7 @@ int check_load_exception(st3 *s3) {
 // CIR:    [[LOAD:%.*]] = cir.load align(8) {{.*}} : !cir.ptr<!cir.ptr<!rec_st3>>, !cir.ptr<!rec_st3>
 // CIR:    [[MEMBER:%.*]] = cir.get_member [[LOAD]][2] {name = "b"} : !cir.ptr<!rec_st3> -> !cir.ptr<!u8i>
 // CIR:    [[BITFI:%.*]] = cir.get_bitfield align(4) (#bfi_b1, [[MEMBER]] {is_volatile} : !cir.ptr<!u8i>) -> !u32i
-// CIR:    [[CAST:%.*]] = cir.cast(integral, [[BITFI]] : !u32i), !s32i
+// CIR:    [[CAST:%.*]] = cir.cast integral [[BITFI]] : !u32i -> !s32i
 // CIR:    cir.store [[CAST]], [[RETVAL:%.*]] : !s32i, !cir.ptr<!s32i>
 // CIR:    [[RET:%.*]] = cir.load [[RETVAL]] : !cir.ptr<!s32i>, !s32i
 // CIR:    cir.return [[RET]] : !s32i
@@ -180,7 +180,7 @@ void check_store(st2 *s2) {
 
 // CIR:  cir.func dso_local @check_store
 // CIR:    [[CONST:%.*]] = cir.const #cir.int<1> : !s32i
-// CIR:    [[CAST:%.*]] = cir.cast(integral, [[CONST]] : !s32i), !s16i
+// CIR:    [[CAST:%.*]] = cir.cast integral [[CONST]] : !s32i -> !s16i
 // CIR:    [[LOAD:%.*]] = cir.load align(8) {{.*}} : !cir.ptr<!cir.ptr<!rec_st2>>, !cir.ptr<!rec_st2>
 // CIR:    [[MEMBER:%.*]] = cir.get_member [[LOAD]][0] {name = "a"} : !cir.ptr<!rec_st2> -> !cir.ptr<!u32i>
 // CIR:    [[SETBF:%.*]] = cir.set_bitfield align(8) (#bfi_a, [[MEMBER]] : !cir.ptr<!u32i>, [[CAST]] : !s16i) {is_volatile} -> !s16i
@@ -211,7 +211,7 @@ void check_store_exception(st3 *s3) {
 
 // CIR:  cir.func dso_local @check_store_exception
 // CIR:    [[CONST:%.*]] = cir.const #cir.int<2> : !s32i
-// CIR:    [[CAST:%.*]] = cir.cast(integral, [[CONST]] : !s32i), !u32i
+// CIR:    [[CAST:%.*]] = cir.cast integral [[CONST]] : !s32i -> !u32i
 // CIR:    [[LOAD:%.*]] = cir.load align(8) {{.*}} : !cir.ptr<!cir.ptr<!rec_st3>>, !cir.ptr<!rec_st3>
 // CIR:    [[MEMBER:%.*]] = cir.get_member [[LOAD]][2] {name = "b"} : !cir.ptr<!rec_st3> -> !cir.ptr<!u8i>
 // CIR:    [[SETBF:%.*]] = cir.set_bitfield align(4) (#bfi_b1, [[MEMBER]] : !cir.ptr<!u8i>, [[CAST]] : !u32i) {is_volatile} -> !u32i
@@ -263,7 +263,7 @@ void check_store_second_member (st4 *s4) {
 
 // CIR:  cir.func dso_local @check_store_second_member
 // CIR:    [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
-// CIR:    [[CAST:%.*]] = cir.cast(integral, [[ONE]] : !s32i), !u64i
+// CIR:    [[CAST:%.*]] = cir.cast integral [[ONE]] : !s32i -> !u64i
 // CIR:    [[LOAD:%.*]] = cir.load align(8) {{.*}} : !cir.ptr<!cir.ptr<!rec_st4>>, !cir.ptr<!rec_st4>
 // CIR:    [[MEMBER:%.*]] = cir.get_member [[LOAD]][2] {name = "b"} : !cir.ptr<!rec_st4> -> !cir.ptr<!u16i>
 // CIR:    cir.set_bitfield align(8) (#bfi_b2, [[MEMBER]] : !cir.ptr<!u16i>, [[CAST]] : !u64i) {is_volatile} -> !u64i
diff --git a/clang/test/CIR/CodeGen/array-ctor.cpp b/clang/test/CIR/CodeGen/array-ctor.cpp
index bad4868ed8c34..5583d9d56954e 100644
--- a/clang/test/CIR/CodeGen/array-ctor.cpp
+++ b/clang/test/CIR/CodeGen/array-ctor.cpp
@@ -27,7 +27,7 @@ void foo() {
 // CIR: cir.func dso_local @_Z3foov()
 // CIR:   %[[ARRAY:.*]] = cir.alloca !cir.array<!rec_S x 42>, !cir.ptr<!cir.array<!rec_S x 42>>, ["s", init]
 // CIR:   %[[CONST42:.*]] = cir.const #cir.int<42> : !u64i
-// CIR:   %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARRAY]] : !cir.ptr<!cir.array<!rec_S x 42>>), !cir.ptr<!rec_S>
+// CIR:   %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARRAY]] : !cir.ptr<!cir.array<!rec_S x 42>> -> !cir.ptr<!rec_S>
 // CIR:   %[[END_PTR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_S>, %[[CONST42]] : !u64i), !cir.ptr<!rec_S>
 // CIR:   %[[ITER:.*]] = cir.alloca !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>, ["__array_idx"]
 // CIR:   cir.store %[[DECAY]], %[[ITER]] : !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
@@ -111,7 +111,7 @@ void multi_dimensional() {
 
 // CIR-BEFORE-LPP:     cir.func{{.*}} @_Z17multi_dimensionalv()
 // CIR-BEFORE-LPP:       %[[S:.*]] = cir.alloca !cir.array<!cir.array<!rec_S x 5> x 3>, !cir.ptr<!cir.array<!cir.array<!rec_S x 5> x 3>>, ["s", init]
-// CIR-BEFORE-LPP:       %[[FLAT:.*]] = cir.cast(bitcast, %[[S]] : !cir.ptr<!cir.array<!cir.array<!rec_S x 5> x 3>>), !cir.ptr<!cir.array<!rec_S x 15>>
+// CIR-BEFORE-LPP:       %[[FLAT:.*]] = cir.cast bitcast %[[S]] : !cir.ptr<!cir.array<!cir.array<!rec_S x 5> x 3>> -> !cir.ptr<!cir.array<!rec_S x 15>>
 // CIR-BEFORE-LPP:       cir.array.ctor %[[FLAT]] : !cir.ptr<!cir.array<!rec_S x 15>> {
 // CIR-BEFORE-LPP:        ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_S>):
 // CIR-BEFORE-LPP:          cir.call @_ZN1SC1Ev(%[[ARG]]) : (!cir.ptr<!rec_S>) -> ()
@@ -122,7 +122,7 @@ void multi_dimensional() {
 // CIR:     cir.func{{.*}} @_Z17multi_dimensionalv()
 // CIR:       %[[S:.*]] = cir.alloca !cir.array<!cir.array<!rec_S x 5> x 3>, !cir.ptr<!cir.array<!cir.array<!rec_S x 5> x 3>>, ["s", init]
 // CIR:       %[[CONST15:.*]] = cir.const #cir.int<15> : !u64i
-// CIR:       %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, {{.*}} : !cir.ptr<!cir.array<!rec_S x 15>>), !cir.ptr<!rec_S>
+// CIR:       %[[DECAY:.*]] = cir.cast array_to_ptrdecay {{.*}} : !cir.ptr<!cir.array<!rec_S x 15>> -> !cir.ptr<!rec_S>
 // CIR:       %[[END_PTR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_S>, %[[CONST15]] : !u64i), !cir.ptr<!rec_S>
 // CIR:       %[[ITER:.*]] = cir.alloca !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>, ["__array_idx"]
 // CIR:       cir.store %[[DECAY]], %[[ITER]] : !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
diff --git a/clang/test/CIR/CodeGen/array-dtor.cpp b/clang/test/CIR/CodeGen/array-dtor.cpp
index 36db265a6dfed..e969d50842a03 100644
--- a/clang/test/CIR/CodeGen/array-dtor.cpp
+++ b/clang/test/CIR/CodeGen/array-dtor.cpp
@@ -26,7 +26,7 @@ void test_cleanup_array() {
 // CIR: cir.func{{.*}} @_Z18test_cleanup_arrayv()
 // CIR:   %[[S:.*]] = cir.alloca !cir.array<!rec_S x 42>, !cir.ptr<!cir.array<!rec_S x 42>>, ["s"]
 // CIR:   %[[CONST41:.*]] = cir.const #cir.int<41> : !u64i
-// CIR:   %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[S]] : !cir.ptr<!cir.array<!rec_S x 42>>), !cir.ptr<!rec_S>
+// CIR:   %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[S]] : !cir.ptr<!cir.array<!rec_S x 42>> -> !cir.ptr<!rec_S>
 // CIR:   %[[END_PTR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_S>, %[[CONST41]] : !u64i), !cir.ptr<!rec_S>
 // CIR:   %[[ITER:.*]] = cir.alloca !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>, ["__array_idx"]
 // CIR:   cir.store %[[END_PTR]], %[[ITER]] : !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
@@ -109,7 +109,7 @@ void multi_dimensional() {
 
 // CIR-BEFORE-LPP:     cir.func{{.*}} @_Z17multi_dimensionalv()
 // CIR-BEFORE-LPP:       %[[S:.*]] = cir.alloca !cir.array<!cir.array<!rec_S x 5> x 3>, !cir.ptr<!cir.array<!cir.array<!rec_S x 5> x 3>>, ["s"]
-// CIR-BEFORE-LPP:       %[[FLAT:.*]] = cir.cast(bitcast, %[[S]] : !cir.ptr<!cir.array<!cir.array<!rec_S x 5> x 3>>), !cir.ptr<!cir.array<!rec_S x 15>>
+// CIR-BEFORE-LPP:       %[[FLAT:.*]] = cir.cast bitcast %[[S]] : !cir.ptr<!cir.array<!cir.array<!rec_S x 5> x 3>> -> !cir.ptr<!cir.array<!rec_S x 15>>
 // CIR-BEFORE-LPP:       cir.array.dtor %[[FLAT]] : !cir.ptr<!cir.array<!rec_S x 15>> {
 // CIR-BEFORE-LPP:       ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_S>):
 // CIR-BEFORE-LPP:         cir.call @_ZN1SD1Ev(%[[ARG]]) nothrow : (!cir.ptr<!rec_S>) -> ()
@@ -119,9 +119,9 @@ void multi_dimensional() {
 
 // CIR:     cir.func{{.*}} @_Z17multi_dimensionalv()
 // CIR:       %[[S:.*]] = cir.alloca !cir.array<!cir.array<!rec_S x 5> x 3>, !cir.ptr<!cir.array<!cir.array<!rec_S x 5> x 3>>, ["s"]
-// CIR:       %[[FLAT:.*]] = cir.cast(bitcast, %[[S]] : !cir.ptr<!cir.array<!cir.array<!rec_S x 5> x 3>>), !cir.ptr<!cir.array<!rec_S x 15>>
+// CIR:       %[[FLAT:.*]] = cir.cast bitcast %[[S]] : !cir.ptr<!cir.array<!cir.array<!rec_S x 5> x 3>> -> !cir.ptr<!cir.array<!rec_S x 15>>
 // CIR:       %[[CONST14:.*]] = cir.const #cir.int<14> : !u64i
-// CIR:       %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[FLAT]] : !cir.ptr<!cir.array<!rec_S x 15>>), !cir.ptr<!rec_S>
+// CIR:       %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[FLAT]] : !cir.ptr<!cir.array<!rec_S x 15>> -> !cir.ptr<!rec_S>
 // CIR:       %[[END_PTR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_S>, %[[CONST14]] : !u64i), !cir.ptr<!rec_S>
 // CIR:       %[[ITER:.*]] = cir.alloca !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>, ["__array_idx"]
 // CIR:       cir.store %[[END_PTR]], %[[ITER]] : !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
diff --git a/clang/test/CIR/CodeGen/array.cpp b/clang/test/CIR/CodeGen/array.cpp
index 5dac10491d036..3333634a256dc 100644
--- a/clang/test/CIR/CodeGen/array.cpp
+++ b/clang/test/CIR/CodeGen/array.cpp
@@ -113,12 +113,12 @@ void func() {
 // CIR: %[[INIT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["e", init]
 // CIR: %[[INIT_2:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["e2", init]
 // CIR: %[[IDX:.*]] = cir.const #cir.int<0> : !s32i
-// CIR: %[[ARR_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR]] : !cir.ptr<!cir.array<!s32i x 10>>), !cir.ptr<!s32i>
+// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!s32i x 10>> -> !cir.ptr<!s32i>
 // CIR: %[[ELE_PTR:.*]] = cir.ptr_stride(%[[ARR_PTR]] : !cir.ptr<!s32i>, %[[IDX]] : !s32i), !cir.ptr<!s32i>
 // CIR: %[[TMP:.*]] = cir.load{{.*}} %[[ELE_PTR]] : !cir.ptr<!s32i>, !s32i
 // CIR" cir.store %[[TMP]], %[[INIT]] : !s32i, !cir.ptr<!s32i>
 // CIR: %[[IDX:.*]] = cir.const #cir.int<1> : !s32i
-// CIR: %[[ARR_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR]] : !cir.ptr<!cir.array<!s32i x 10>>), !cir.ptr<!s32i>
+// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!s32i x 10>> -> !cir.ptr<!s32i>
 // CIR: %[[ELE_PTR:.*]] = cir.ptr_stride(%[[ARR_PTR]] : !cir.ptr<!s32i>, %[[IDX]] : !s32i), !cir.ptr<!s32i>
 // CIR: %[[TMP:.*]] = cir.load{{.*}} %[[ELE_PTR]] : !cir.ptr<!s32i>, !s32i
 // CIR" cir.store %[[TMP]], %[[INIT_2]] : !s32i, !cir.ptr<!s32i>
@@ -152,7 +152,7 @@ void func2() {
 
 // CIR: %[[ARR2:.*]] = cir.alloca !cir.array<!s32i x 2>, !cir.ptr<!cir.array<!s32i x 2>>, ["arr", init]
 // CIR: %[[ARR_PTR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp", init]
-// CIR: %[[ARR_0:.*]] = cir.cast(array_to_ptrdecay, %[[ARR2]] : !cir.ptr<!cir.array<!s32i x 2>>), !cir.ptr<!s32i>
+// CIR: %[[ARR_0:.*]] = cir.cast array_to_ptrdecay %[[ARR2]] : !cir.ptr<!cir.array<!s32i x 2>> -> !cir.ptr<!s32i>
 // CIR: %[[FIVE:.*]] = cir.const #cir.int<5> : !s32i
 // CIR: cir.store{{.*}} %[[FIVE]], %[[ARR_0]] : !s32i, !cir.ptr<!s32i>
 // CIR: %[[OFFSET_0:.*]] = cir.const #cir.int<1> : !s64i
@@ -209,7 +209,7 @@ void func3() {
 // CIR: %[[ARR:.*]] = cir.alloca !cir.array<!s32i x 2>, !cir.ptr<!cir.array<!s32i x 2>>, ["arr", init]
 // CIR: %[[IDX:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["idx", init]
 // CIR: %[[INIT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["e", init]
-// CIR: %[[ARR_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR]] : !cir.ptr<!cir.array<!s32i x 2>>), !cir.ptr<!s32i>
+// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!s32i x 2>> -> !cir.ptr<!s32i>
 // CIR: %[[V0:.*]] = cir.const #cir.int<5> : !s32i
 // CIR: cir.store{{.*}} %[[V0]], %[[ARR_PTR]] : !s32i, !cir.ptr<!s32i>
 // CIR: %[[OFFSET_0:.*]] = cir.const #cir.int<1> : !s64i
@@ -219,7 +219,7 @@ void func3() {
 // CIR: %[[IDX_V:.*]] = cir.const #cir.int<1> : !s32i
 // CIR: cir.store{{.*}} %[[IDX_V]], %[[IDX]] : !s32i, !cir.ptr<!s32i>
 // CIR: %[[TMP_IDX:.*]] = cir.load{{.*}} %[[IDX]] : !cir.ptr<!s32i>, !s32i
-// CIR: %[[ARR_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR]] : !cir.ptr<!cir.array<!s32i x 2>>), !cir.ptr<!s32i>
+// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!s32i x 2>> -> !cir.ptr<!s32i>
 // CIR: %[[ELE_PTR:.*]] = cir.ptr_stride(%[[ARR_PTR]] : !cir.ptr<!s32i>, %[[TMP_IDX]] : !s32i), !cir.ptr<!s32i>
 // CIR: %[[ELE_TMP:.*]] = cir.load{{.*}} %[[ELE_PTR]] : !cir.ptr<!s32i>, !s32i
 // CIR: cir.store{{.*}} %[[ELE_TMP]], %[[INIT]] : !s32i, !cir.ptr<!s32i>
@@ -258,20 +258,20 @@ void func4() {
 
 // CIR: %[[ARR:.*]] = cir.alloca !cir.array<!cir.array<!s32i x 1> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>, ["arr", init]
 // CIR: %[[INIT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["e", init]
-// CIR: %[[ARR_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR]] : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>), !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ARR_0_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR_PTR]] : !cir.ptr<!cir.array<!s32i x 1>>), !cir.ptr<!s32i>
+// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>> -> !cir.ptr<!cir.array<!s32i x 1>>
+// CIR: %[[ARR_0_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR_PTR]] : !cir.ptr<!cir.array<!s32i x 1>> -> !cir.ptr<!s32i>
 // CIR: %[[V_0_0:.*]] = cir.const #cir.int<5> : !s32i
 // CIR: cir.store{{.*}} %[[V_0_0]], %[[ARR_0_PTR]] : !s32i, !cir.ptr<!s32i>
 // CIR: %[[OFFSET:.*]] = cir.const #cir.int<1> : !s64i
 // CIR: %[[ARR_1:.*]] = cir.ptr_stride(%[[ARR_PTR]] : !cir.ptr<!cir.array<!s32i x 1>>, %[[OFFSET]] : !s64i), !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ARR_1_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR_1]] : !cir.ptr<!cir.array<!s32i x 1>>), !cir.ptr<!s32i>
+// CIR: %[[ARR_1_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR_1]] : !cir.ptr<!cir.array<!s32i x 1>> -> !cir.ptr<!s32i>
 // CIR: %[[V_1_0:.*]] = cir.const #cir.int<6> : !s32i
 // CIR: cir.store{{.*}} %[[V_1_0]], %[[ARR_1_PTR]] : !s32i, !cir.ptr<!s32i>
 // CIR: %[[IDX:.*]] = cir.const #cir.int<0> : !s32i
 // CIR: %[[IDX_1:.*]] = cir.const #cir.int<1> : !s32i
-// CIR: %[[ARR_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR]] : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>), !cir.ptr<!cir.array<!s32i x 1>>
+// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>> -> !cir.ptr<!cir.array<!s32i x 1>>
 // CIR: %[[ARR_1:.*]] = cir.ptr_stride(%[[ARR_PTR]] : !cir.ptr<!cir.array<!s32i x 1>>, %[[IDX_1]] : !s32i), !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ARR_1_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR_1]] : !cir.ptr<!cir.array<!s32i x 1>>), !cir.ptr<!s32i>
+// CIR: %[[ARR_1_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR_1]] : !cir.ptr<!cir.array<!s32i x 1>> -> !cir.ptr<!s32i>
 // CIR: %[[ELE_0:.*]] = cir.ptr_stride(%[[ARR_1_PTR]] : !cir.ptr<!s32i>, %[[IDX]] : !s32i), !cir.ptr<!s32i>
 // CIR: %[[TMP:.*]] = cir.load{{.*}} %[[ELE_0]] : !cir.ptr<!s32i>, !s32i
 // CIR: cir.store{{.*}} %[[TMP]], %[[INIT]] : !s32i, !cir.ptr<!s32i>
@@ -306,8 +306,8 @@ void func5() {
 
 // CIR: %[[ARR:.*]] = cir.alloca !cir.array<!cir.array<!s32i x 1> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>, ["arr", init]
 // CIR: %[[ARR_PTR:.*]] = cir.alloca !cir.ptr<!cir.array<!s32i x 1>>, !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>, ["arrayinit.temp", init]
-// CIR: %[[ARR_0:.*]] = cir.cast(array_to_ptrdecay, %0 : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>>), !cir.ptr<!cir.array<!s32i x 1>>
-// CIR: %[[ARR_0_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR_0]] : !cir.ptr<!cir.array<!s32i x 1>>), !cir.ptr<!s32i>
+// CIR: %[[ARR_0:.*]] = cir.cast array_to_ptrdecay %0 : !cir.ptr<!cir.array<!cir.array<!s32i x 1> x 2>> -> !cir.ptr<!cir.array<!s32i x 1>>
+// CIR: %[[ARR_0_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR_0]] : !cir.ptr<!cir.array<!s32i x 1>> -> !cir.ptr<!s32i>
 // CIR: %[[V_0_0:.*]] = cir.const #cir.int<5> : !s32i
 // CIR: cir.store{{.*}} %[[V_0_0]], %[[ARR_0_PTR]] : !s32i, !cir.ptr<!s32i>
 // CIR: %[[OFFSET:.*]] = cir.const #cir.int<1> : !s64i
@@ -364,7 +364,7 @@ void func6() {
 // CIR: %[[ARR:.*]] = cir.alloca !cir.array<!s32i x 2>, !cir.ptr<!cir.array<!s32i x 2>>, ["arr", init]
 // CIR: %[[V:.*]] = cir.const #cir.int<4> : !s32i
 // CIR: cir.store{{.*}} %[[V]], %[[VAR]] : !s32i, !cir.ptr<!s32i>
-// CIR: %[[ARR_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR]] : !cir.ptr<!cir.array<!s32i x 2>>), !cir.ptr<!s32i>
+// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!s32i x 2>> -> !cir.ptr<!s32i>
 // CIR: %[[TMP:.*]] = cir.load{{.*}} %[[VAR]] : !cir.ptr<!s32i>, !s32i
 // CIR: cir.store{{.*}} %[[TMP]], %[[ARR_PTR]] : !s32i, !cir.ptr<!s32i>
 // CIR: %[[OFFSET:.*]] = cir.const #cir.int<1> : !s64i
@@ -396,7 +396,7 @@ void func7() {
 
 // CIR: %[[ARR:.*]] = cir.alloca !cir.array<!cir.ptr<!s32i> x 1>, !cir.ptr<!cir.array<!cir.ptr<!s32i> x 1>>, ["arr", init]
 // CIR: %[[ARR_PTR:.*]] = cir.alloca !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!cir.ptr<!s32i>>>, ["arrayinit.temp", init]
-// CIR: %[[ARR_0:.*]] = cir.cast(array_to_ptrdecay, %[[ARR]] : !cir.ptr<!cir.array<!cir.ptr<!s32i> x 1>>), !cir.ptr<!cir.ptr<!s32i>>
+// CIR: %[[ARR_0:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!cir.ptr<!s32i> x 1>> -> !cir.ptr<!cir.ptr<!s32i>>
 // CIR: cir.store{{.*}} %[[ARR_0]], %[[ARR_PTR]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!cir.ptr<!cir.ptr<!s32i>>>
 // CIR: %[[ONE:.*]] = cir.const #cir.int<1> : !s64i
 // CIR: %[[ARR_END:.*]] = cir.ptr_stride(%[[ARR_0]] : !cir.ptr<!cir.ptr<!s32i>>, %[[ONE]] : !s64i), !cir.ptr<!cir.ptr<!s32i>>
@@ -497,7 +497,7 @@ void func9(int arr[10][5]) {
 // CIR:  %[[IDX_1:.*]] = cir.const #cir.int<1> : !s32i
 // CIR:  %[[TMP_1:.*]] = cir.load{{.*}} %[[ARR]] : !cir.ptr<!cir.ptr<!cir.array<!s32i x 5>>>, !cir.ptr<!cir.array<!s32i x 5>>
 // CIR:  %[[ARR_1:.*]] = cir.ptr_stride(%[[TMP_1]] : !cir.ptr<!cir.array<!s32i x 5>>, %[[IDX_1]] : !s32i), !cir.ptr<!cir.array<!s32i x 5>>
-// CIR:  %[[ARR_1_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR_1]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CIR:  %[[ARR_1_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR_1]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CIR:  %[[ARR_1_2:.*]] = cir.ptr_stride(%[[ARR_1_PTR]] : !cir.ptr<!s32i>, %[[IDX]] : !s32i), !cir.ptr<!s32i>
 // CIR:  %[[TMP_2:.*]] = cir.load{{.*}} %[[ARR_1_2]] : !cir.ptr<!s32i>, !s32i
 // CIR:  cir.store{{.*}} %[[TMP_2]], %[[INIT]] : !s32i, !cir.ptr<!s32i>
@@ -581,7 +581,7 @@ void array_with_complex_elements() {
 }
 
 // CIR: %[[ARR_ADDR:.*]] = cir.alloca !cir.array<!cir.complex<!cir.float> x 2>, !cir.ptr<!cir.array<!cir.complex<!cir.float> x 2>>, ["arr", init]
-// CIR: %[[ARR_0:.*]] = cir.cast(array_to_ptrdecay, %[[ARR_ADDR]] : !cir.ptr<!cir.array<!cir.complex<!cir.float> x 2>>), !cir.ptr<!cir.complex<!cir.float>>
+// CIR: %[[ARR_0:.*]] = cir.cast array_to_ptrdecay %[[ARR_ADDR]] : !cir.ptr<!cir.array<!cir.complex<!cir.float> x 2>> -> !cir.ptr<!cir.complex<!cir.float>>
 // CIR: %[[CONST_COMPLEX_0:.*]] = cir.const #cir.const_complex<#cir.fp<1.100000e+00> : !cir.float, #cir.fp<2.200000e+00> : !cir.float> : !cir.complex<!cir.float>
 // CIR: cir.store{{.*}} %[[CONST_COMPLEX_0]], %[[ARR_0]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
 // CIR: %[[IDX_1:.*]] = cir.const #cir.int<1> : !s64i
diff --git a/clang/test/CIR/CodeGen/assign-operator.cpp b/clang/test/CIR/CodeGen/assign-operator.cpp
index 3e509f59368b6..1089d4b6e69f8 100644
--- a/clang/test/CIR/CodeGen/assign-operator.cpp
+++ b/clang/test/CIR/CodeGen/assign-operator.cpp
@@ -17,7 +17,7 @@ void a() {
 // CIR: cir.func{{.*}} @_Z1av()
 // CIR:   %[[A_ADDR:.*]] = cir.alloca !rec_x, !cir.ptr<!rec_x>, ["a"]
 // CIR:   %[[ONE:.*]] = cir.const #cir.int<1> : !u32i
-// CIR:   %[[ONE_CAST:.*]] = cir.cast(integral, %[[ONE]] : !u32i), !s32i
+// CIR:   %[[ONE_CAST:.*]] = cir.cast integral %[[ONE]] : !u32i -> !s32i
 // CIR:   %[[RET:.*]] = cir.call @_ZN1xaSEi(%[[A_ADDR]], %[[ONE_CAST]]) : (!cir.ptr<!rec_x>, !s32i) -> !s32i
 
 // LLVM: define{{.*}} @_Z1av()
@@ -75,10 +75,10 @@ void copy_c(C &c1, C &c2) {
 // CIR:   %[[A_MEMBER_2:.*]] = cir.get_member %[[ARG1_LOAD]][0] {name = "a"}
 // CIR:   %[[C_A:.*]] = cir.call @_ZN1AaSERKS_(%[[A_MEMBER]], %[[A_MEMBER_2]])
 // CIR:   %[[B_MEMBER:.*]] = cir.get_member %[[THIS]][1] {name = "b"}
-// CIR:   %[[B_VOID_PTR:.*]] = cir.cast(bitcast, %[[B_MEMBER]] : !cir.ptr<!cir.array<!rec_B x 16>>), !cir.ptr<!void>
+// CIR:   %[[B_VOID_PTR:.*]] = cir.cast bitcast %[[B_MEMBER]] : !cir.ptr<!cir.array<!rec_B x 16>> -> !cir.ptr<!void>
 // CIR:   %[[RET_LOAD:.*]] = cir.load %[[ARG1_ADDR]]
 // CIR:   %[[B_MEMBER_2:.*]] = cir.get_member %[[RET_LOAD]][1] {name = "b"}
-// CIR:   %[[B_VOID_PTR_2:.*]] = cir.cast(bitcast, %[[B_MEMBER_2]] : !cir.ptr<!cir.array<!rec_B x 16>>), !cir.ptr<!void>
+// CIR:   %[[B_VOID_PTR_2:.*]] = cir.cast bitcast %[[B_MEMBER_2]] : !cir.ptr<!cir.array<!rec_B x 16>> -> !cir.ptr<!void>
 // CIR:   %[[SIZE:.*]] = cir.const #cir.int<64> : !u64i
 // CIR:   %[[COUNT:.*]] = cir.call @memcpy(%[[B_VOID_PTR]], %[[B_VOID_PTR_2]], %[[SIZE]])
 // CIR:   cir.store %[[THIS]], %[[RET_ADDR]]
diff --git a/clang/test/CIR/CodeGen/basic.c b/clang/test/CIR/CodeGen/basic.c
index 2c3c5b0f22a5c..9268615bc9fb0 100644
--- a/clang/test/CIR/CodeGen/basic.c
+++ b/clang/test/CIR/CodeGen/basic.c
@@ -296,7 +296,7 @@ size_type max_size(void) {
 // CIR:   %0 = cir.alloca !u64i, !cir.ptr<!u64i>, ["__retval"] {alignment = 8 : i64}
 // CIR:   %1 = cir.const #cir.int<0> : !s32i
 // CIR:   %2 = cir.unary(not, %1) : !s32i, !s32i
-// CIR:   %3 = cir.cast(integral, %2 : !s32i), !u64i
+// CIR:   %3 = cir.cast integral %2 : !s32i -> !u64i
 // CIR:   %4 = cir.const #cir.int<8> : !u64i
 // CIR:   %5 = cir.binop(div, %3, %4) : !u64i
 
diff --git a/clang/test/CIR/CodeGen/basic.cpp b/clang/test/CIR/CodeGen/basic.cpp
index fe6dd938f0faf..af8de6fff047a 100644
--- a/clang/test/CIR/CodeGen/basic.cpp
+++ b/clang/test/CIR/CodeGen/basic.cpp
@@ -124,7 +124,7 @@ size_type max_size() {
 // CHECK:   %0 = cir.alloca !u64i, !cir.ptr<!u64i>, ["__retval"] {alignment = 8 : i64}
 // CHECK:   %1 = cir.const #cir.int<0> : !s32i
 // CHECK:   %2 = cir.unary(not, %1) : !s32i, !s32i
-// CHECK:   %3 = cir.cast(integral, %2 : !s32i), !u64i
+// CHECK:   %3 = cir.cast integral %2 : !s32i -> !u64i
 // CHECK:   %4 = cir.const #cir.int<8> : !u64i
 // CHECK:   %5 = cir.binop(div, %3, %4) : !u64i
 // CHECK:   cir.store{{.*}} %5, %0 : !u64i, !cir.ptr<!u64i>
diff --git a/clang/test/CIR/CodeGen/binassign.c b/clang/test/CIR/CodeGen/binassign.c
index 541b50a664c0e..65bea4df7d837 100644
--- a/clang/test/CIR/CodeGen/binassign.c
+++ b/clang/test/CIR/CodeGen/binassign.c
@@ -25,7 +25,7 @@ void binary_assign(void) {
 // CIR:         %[[TRUE:.*]] = cir.const #true
 // CIR:         cir.store{{.*}} %[[TRUE]], %[[B]] : !cir.bool, !cir.ptr<!cir.bool>
 // CIR:         %[[CHAR_INI_INIT:.*]] = cir.const #cir.int<65> : !s32i
-// CIR:         %[[CHAR_VAL:.*]] = cir.cast(integral, %[[CHAR_INI_INIT]] : !s32i), !s8i
+// CIR:         %[[CHAR_VAL:.*]] = cir.cast integral %[[CHAR_INI_INIT]] : !s32i -> !s8i
 // CIR:         cir.store{{.*}} %[[CHAR_VAL]], %[[C]] : !s8i, !cir.ptr<!s8i>
 // CIR:         %[[FLOAT_VAL:.*]] = cir.const #cir.fp<3.140000e+00> : !cir.float
 // CIR:         cir.store{{.*}} %[[FLOAT_VAL]], %[[F]] : !cir.float, !cir.ptr<!cir.float>
diff --git a/clang/test/CIR/CodeGen/binop.c b/clang/test/CIR/CodeGen/binop.c
index 280fd29b067f9..4427e4b605297 100644
--- a/clang/test/CIR/CodeGen/binop.c
+++ b/clang/test/CIR/CodeGen/binop.c
@@ -5,9 +5,9 @@ void conditionalResultIimplicitCast(int a, int b, float f) {
   // Should implicit cast back to int.
   int x = a && b;
   // CHECK: %[[#INT:]] = cir.ternary
-  // CHECK: %{{.+}} = cir.cast(bool_to_int, %[[#INT]] : !cir.bool), !s32i
+  // CHECK: %{{.+}} = cir.cast bool_to_int %[[#INT]] : !cir.bool -> !s32i
   float y = f && f;
   // CHECK: %[[#BOOL:]] = cir.ternary
-  // CHECK: %[[#INT:]] = cir.cast(bool_to_int, %[[#BOOL]] : !cir.bool), !s32i
-  // CHECK: %{{.+}} = cir.cast(int_to_float, %[[#INT]] : !s32i), !cir.float
+  // CHECK: %[[#INT:]] = cir.cast bool_to_int %[[#BOOL]] : !cir.bool -> !s32i
+  // CHECK: %{{.+}} = cir.cast int_to_float %[[#INT]] : !s32i -> !cir.float
 }
diff --git a/clang/test/CIR/CodeGen/binop.cpp b/clang/test/CIR/CodeGen/binop.cpp
index 847e81755939f..c1a432dbc2c32 100644
--- a/clang/test/CIR/CodeGen/binop.cpp
+++ b/clang/test/CIR/CodeGen/binop.cpp
@@ -337,13 +337,13 @@ void zext_shift_example(int a, unsigned char b) {
 
 // CIR: %[[A1:.*]] = cir.load{{.*}} %[[A_PTR]] : !cir.ptr<!s32i>, !s32i
 // CIR: %[[B1:.*]] = cir.load{{.*}} %[[B_PTR]] : !cir.ptr<!u8i>, !u8i
-// CIR: %[[B1_EXT:.*]] = cir.cast(integral, %[[B1]] : !u8i), !s32i
+// CIR: %[[B1_EXT:.*]] = cir.cast integral %[[B1]] : !u8i -> !s32i
 // CIR: %[[ASHR:.*]] = cir.shift(right, %[[A1]] : !s32i, %[[B1_EXT]] : !s32i) -> !s32i
 // CIR: cir.store{{.*}} %[[ASHR]], %[[X_PTR]] : !s32i, !cir.ptr<!s32i>
 
 // CIR: %[[A2:.*]] = cir.load{{.*}} %[[A_PTR]] : !cir.ptr<!s32i>, !s32i
 // CIR: %[[B2:.*]] = cir.load{{.*}} %[[B_PTR]] : !cir.ptr<!u8i>, !u8i
-// CIR: %[[B2_EXT:.*]] = cir.cast(integral, %[[B2]] : !u8i), !s32i
+// CIR: %[[B2_EXT:.*]] = cir.cast integral %[[B2]] : !u8i -> !s32i
 // CIR: %[[SHL:.*]] = cir.shift(left, %[[A2]] : !s32i, %[[B2_EXT]] : !s32i) -> !s32i
 // CIR: cir.store{{.*}} %[[SHL]], %[[X_PTR]] : !s32i, !cir.ptr<!s32i>
 
@@ -409,13 +409,13 @@ void sext_shift_example(int a, signed char b) {
 
 // CIR: %[[A1:.*]] = cir.load{{.*}} %[[A_PTR]] : !cir.ptr<!s32i>, !s32i
 // CIR: %[[B1:.*]] = cir.load{{.*}} %[[B_PTR]] : !cir.ptr<!s8i>, !s8i
-// CIR: %[[B1_EXT:.*]] = cir.cast(integral, %[[B1]] : !s8i), !s32i
+// CIR: %[[B1_EXT:.*]] = cir.cast integral %[[B1]] : !s8i -> !s32i
 // CIR: %[[ASHR:.*]] = cir.shift(right, %[[A1]] : !s32i, %[[B1_EXT]] : !s32i) -> !s32i
 // CIR: cir.store{{.*}} %[[ASHR]], %[[X_PTR]] : !s32i, !cir.ptr<!s32i>
 
 // CIR: %[[A2:.*]] = cir.load{{.*}} %[[A_PTR]] : !cir.ptr<!s32i>, !s32i
 // CIR: %[[B2:.*]] = cir.load{{.*}} %[[B_PTR]] : !cir.ptr<!s8i>, !s8i
-// CIR: %[[B2_EXT:.*]] = cir.cast(integral, %[[B2]] : !s8i), !s32i
+// CIR: %[[B2_EXT:.*]] = cir.cast integral %[[B2]] : !s8i -> !s32i
 // CIR: %[[SHL:.*]] = cir.shift(left, %[[A2]] : !s32i, %[[B2_EXT]] : !s32i) -> !s32i
 // CIR: cir.store{{.*}} %[[SHL]], %[[X_PTR]] : !s32i, !cir.ptr<!s32i>
 
@@ -481,13 +481,13 @@ void long_shift_example(long long a, short b) {
 
 // CIR: %[[A1:.*]] = cir.load{{.*}} %[[A_PTR]] : !cir.ptr<!s64i>, !s64i
 // CIR: %[[B1:.*]] = cir.load{{.*}} %[[B_PTR]] : !cir.ptr<!s16i>, !s16i
-// CIR: %[[B1_EXT:.*]] = cir.cast(integral, %[[B1]] : !s16i), !s32i
+// CIR: %[[B1_EXT:.*]] = cir.cast integral %[[B1]] : !s16i -> !s32i
 // CIR: %[[ASHR:.*]] = cir.shift(right, %[[A1]] : !s64i, %[[B1_EXT]] : !s32i) -> !s64i
 // CIR: cir.store{{.*}} %[[ASHR]], %[[X_PTR]] : !s64i, !cir.ptr<!s64i>
 
 // CIR: %[[A2:.*]] = cir.load{{.*}} %[[A_PTR]] : !cir.ptr<!s64i>, !s64i
 // CIR: %[[B2:.*]] = cir.load{{.*}} %[[B_PTR]] : !cir.ptr<!s16i>, !s16i
-// CIR: %[[B2_EXT:.*]] = cir.cast(integral, %[[B2]] : !s16i), !s32i
+// CIR: %[[B2_EXT:.*]] = cir.cast integral %[[B2]] : !s16i -> !s32i
 // CIR: %[[SHL:.*]] = cir.shift(left, %[[A2]] : !s64i, %[[B2_EXT]] : !s32i) -> !s64i
 // CIR: cir.store{{.*}} %[[SHL]], %[[X_PTR]] : !s64i, !cir.ptr<!s64i>
 
diff --git a/clang/test/CIR/CodeGen/builtin_bit.cpp b/clang/test/CIR/CodeGen/builtin_bit.cpp
index 8b9a187e799ed..32a53d883a170 100644
--- a/clang/test/CIR/CodeGen/builtin_bit.cpp
+++ b/clang/test/CIR/CodeGen/builtin_bit.cpp
@@ -34,7 +34,7 @@ int test_builtin_clrsbl(long x) {
 
 // CIR-LABEL: _Z19test_builtin_clrsbll
 // CIR:         [[TMP:%.+]] = cir.clrsb %{{.+}} : !s64i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !s64i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !s64i -> !s32i
 
 // LLVM-LABEL: _Z19test_builtin_clrsbll
 // LLVM:         %[[X:.+]] = load i64, ptr %{{.+}}, align 8
@@ -58,7 +58,7 @@ int test_builtin_clrsbll(long long x) {
 
 // CIR-LABEL: _Z20test_builtin_clrsbllx
 // CIR:         [[TMP:%.+]] = cir.clrsb %{{.+}} : !s64i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !s64i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !s64i -> !s32i
 
 // LLVM-LABEL: _Z20test_builtin_clrsbllx
 // LLVM:         %[[X:.+]] = load i64, ptr %{{.+}}, align 8
@@ -82,7 +82,7 @@ int test_builtin_ctzs(unsigned short x) {
 
 // CIR-LABEL: _Z17test_builtin_ctzst
 // CIR:         [[TMP:%.+]] = cir.ctz %{{.+}} poison_zero : !u16i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u16i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u16i -> !s32i
 
 // LLVM-LABEL: _Z17test_builtin_ctzst
 // LLVM:         %{{.+}} = call i16 @llvm.cttz.i16(i16 %{{.+}}, i1 true)
@@ -96,7 +96,7 @@ int test_builtin_ctz(unsigned x) {
 
 // CIR-LABEL: _Z16test_builtin_ctzj
 // CIR:         [[TMP:%.+]] = cir.ctz %{{.+}} poison_zero : !u32i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u32i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u32i -> !s32i
 
 // LLVM-LABEL: _Z16test_builtin_ctzj
 // LLVM:         %{{.+}} = call i32 @llvm.cttz.i32(i32 %{{.+}}, i1 true)
@@ -110,7 +110,7 @@ int test_builtin_ctzl(unsigned long x) {
 
 // CIR-LABEL: _Z17test_builtin_ctzlm
 // CIR:         [[TMP:%.+]] = cir.ctz %{{.+}} poison_zero : !u64i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u64i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u64i -> !s32i
 
 // LLVM-LABEL: _Z17test_builtin_ctzlm
 // LLVM:         %{{.+}} = call i64 @llvm.cttz.i64(i64 %{{.+}}, i1 true)
@@ -124,7 +124,7 @@ int test_builtin_ctzll(unsigned long long x) {
 
 // CIR-LABEL: _Z18test_builtin_ctzlly
 // CIR:         [[TMP:%.+]] = cir.ctz %{{.+}} poison_zero : !u64i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u64i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u64i -> !s32i
 
 // LLVM-LABEL: _Z18test_builtin_ctzlly
 // LLVM:         %{{.+}} = call i64 @llvm.cttz.i64(i64 %{{.+}}, i1 true)
@@ -138,7 +138,7 @@ int test_builtin_ctzg(unsigned x) {
 
 // CIR-LABEL: _Z17test_builtin_ctzgj
 // CIR:         [[TMP:%.+]] = cir.ctz %{{.+}} poison_zero : !u32i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u32i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u32i -> !s32i
 
 // LLVM-LABEL: _Z17test_builtin_ctzgj
 // LLVM:         %{{.+}} = call i32 @llvm.cttz.i32(i32 %{{.+}}, i1 true)
@@ -152,7 +152,7 @@ int test_builtin_clzs(unsigned short x) {
 
 // CIR-LABEL: _Z17test_builtin_clzst
 // CIR:         [[TMP:%.+]] = cir.clz %{{.+}} poison_zero : !u16i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u16i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u16i -> !s32i
 
 // LLVM-LABEL: _Z17test_builtin_clzst
 // LLVM:         %{{.+}} = call i16 @llvm.ctlz.i16(i16 %{{.+}}, i1 true)
@@ -166,7 +166,7 @@ int test_builtin_clz(unsigned x) {
 
 // CIR-LABEL: _Z16test_builtin_clzj
 // CIR:         [[TMP:%.+]] = cir.clz %{{.+}} poison_zero : !u32i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u32i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u32i -> !s32i
 
 // LLVM-LABEL: _Z16test_builtin_clzj
 // LLVM:         %{{.+}} = call i32 @llvm.ctlz.i32(i32 %{{.+}}, i1 true)
@@ -180,7 +180,7 @@ int test_builtin_clzl(unsigned long x) {
 
 // CIR-LABEL: _Z17test_builtin_clzlm
 // CIR:         [[TMP:%.+]] = cir.clz %{{.+}} poison_zero : !u64i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u64i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u64i -> !s32i
 
 // LLVM-LABEL: _Z17test_builtin_clzlm
 // LLVM:         %{{.+}} = call i64 @llvm.ctlz.i64(i64 %{{.+}}, i1 true)
@@ -194,7 +194,7 @@ int test_builtin_clzll(unsigned long long x) {
 
 // CIR-LABEL: _Z18test_builtin_clzlly
 // CIR:         [[TMP:%.+]] = cir.clz %{{.+}} poison_zero : !u64i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u64i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u64i -> !s32i
 
 // LLVM-LABEL: _Z18test_builtin_clzlly
 // LLVM:         %{{.+}} = call i64 @llvm.ctlz.i64(i64 %{{.+}}, i1 true)
@@ -208,7 +208,7 @@ int test_builtin_clzg(unsigned x) {
 
 // CIR-LABEL: _Z17test_builtin_clzgj
 // CIR:         [[TMP:%.+]] = cir.clz %{{.+}} poison_zero : !u32i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u32i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u32i -> !s32i
 
 // LLVM-LABEL: _Z17test_builtin_clzgj
 // LLVM:         %{{.+}} = call i32 @llvm.ctlz.i32(i32 %{{.+}}, i1 true)
@@ -294,7 +294,7 @@ int test_builtin_parity(unsigned x) {
 
 // CIR-LABEL: _Z19test_builtin_parityj
 // CIR:         [[TMP:%.+]] = cir.parity %{{.+}} : !u32i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u32i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u32i -> !s32i
 
 // LLVM-LABEL: _Z19test_builtin_parityj
 // LLVM:         %[[X:.+]] = load i32, ptr %{{.+}}, align 4
@@ -312,7 +312,7 @@ int test_builtin_parityl(unsigned long x) {
 
 // CIR-LABEL: _Z20test_builtin_paritylm
 // CIR:         [[TMP:%.+]] = cir.parity %{{.+}} : !u64i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u64i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u64i -> !s32i
 
 // LLVM-LABEL: _Z20test_builtin_paritylm
 // LLVM:         %[[X:.+]] = load i64, ptr %{{.+}}, align 8
@@ -330,7 +330,7 @@ int test_builtin_parityll(unsigned long long x) {
 
 // CIR-LABEL: _Z21test_builtin_paritylly
 // CIR:         [[TMP:%.+]] = cir.parity %{{.+}} : !u64i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u64i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u64i -> !s32i
 
 // LLVM-LABEL: _Z21test_builtin_paritylly
 // LLVM:         %[[X:.+]] = load i64, ptr %{{.+}}, align 8
@@ -348,7 +348,7 @@ int test_builtin_popcount(unsigned x) {
 
 // CIR-LABEL: _Z21test_builtin_popcountj
 // CIR:         [[TMP:%.+]] = cir.popcount %{{.+}} : !u32i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u32i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u32i -> !s32i
 
 // LLVM-LABEL: _Z21test_builtin_popcountj
 // LLVM:         %{{.+}} = call i32 @llvm.ctpop.i32(i32 %{{.+}})
@@ -362,7 +362,7 @@ int test_builtin_popcountl(unsigned long x) {
 
 // CIR-LABEL: _Z22test_builtin_popcountlm
 // CIR:         [[TMP:%.+]] = cir.popcount %{{.+}} : !u64i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u64i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u64i -> !s32i
 
 // LLVM-LABEL: _Z22test_builtin_popcountlm
 // LLVM:         %{{.+}} = call i64 @llvm.ctpop.i64(i64 %{{.+}})
@@ -376,7 +376,7 @@ int test_builtin_popcountll(unsigned long long x) {
 
 // CIR-LABEL: _Z23test_builtin_popcountlly
 // CIR:         [[TMP:%.+]] = cir.popcount %{{.+}} : !u64i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u64i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u64i -> !s32i
 
 // LLVM-LABEL: _Z23test_builtin_popcountlly
 // LLVM:         %{{.+}} = call i64 @llvm.ctpop.i64(i64 %{{.+}})
@@ -390,7 +390,7 @@ int test_builtin_popcountg(unsigned x) {
 
 // CIR-LABEL: _Z22test_builtin_popcountgj
 // CIR:         [[TMP:%.+]] = cir.popcount %{{.+}} : !u32i
-// CIR:         {{%.+}} = cir.cast(integral, [[TMP]] : !u32i), !s32i
+// CIR:         {{%.+}} = cir.cast integral [[TMP]] : !u32i -> !s32i
 
 // LLVM-LABEL: _Z22test_builtin_popcountgj
 // LLVM:         %{{.+}} = call i32 @llvm.ctpop.i32(i32 %{{.+}})
diff --git a/clang/test/CIR/CodeGen/builtin_call.cpp b/clang/test/CIR/CodeGen/builtin_call.cpp
index 853d894a3311b..a30df97250d19 100644
--- a/clang/test/CIR/CodeGen/builtin_call.cpp
+++ b/clang/test/CIR/CodeGen/builtin_call.cpp
@@ -165,9 +165,9 @@ void expect(int x, int y) {
 
 // CIR-LABEL: cir.func{{.*}} @_Z6expectii
 // CIR:         %[[X:.+]] = cir.load align(4) %{{.+}} : !cir.ptr<!s32i>, !s32i
-// CIR-NEXT:    %[[X_LONG:.+]] = cir.cast(integral, %[[X]] : !s32i), !s64i
+// CIR-NEXT:    %[[X_LONG:.+]] = cir.cast integral %[[X]] : !s32i -> !s64i
 // CIR-NEXT:    %[[Y:.+]] = cir.load align(4) %{{.+}} : !cir.ptr<!s32i>, !s32i
-// CIR-NEXT:    %[[Y_LONG:.+]] = cir.cast(integral, %[[Y]] : !s32i), !s64i
+// CIR-NEXT:    %[[Y_LONG:.+]] = cir.cast integral %[[Y]] : !s32i -> !s64i
 // CIR-NEXT:    %{{.+}} = cir.expect(%[[X_LONG]], %[[Y_LONG]]) : !s64i
 // CIR:       }
 
@@ -185,9 +185,9 @@ void expect_prob(int x, int y) {
 
 // CIR-LABEL: cir.func{{.*}} @_Z11expect_probii
 // CIR:         %[[X:.+]] = cir.load align(4) %{{.+}} : !cir.ptr<!s32i>, !s32i
-// CIR-NEXT:    %[[X_LONG:.+]] = cir.cast(integral, %[[X]] : !s32i), !s64i
+// CIR-NEXT:    %[[X_LONG:.+]] = cir.cast integral %[[X]] : !s32i -> !s64i
 // CIR-NEXT:    %[[Y:.+]] = cir.load align(4) %{{.+}} : !cir.ptr<!s32i>, !s32i
-// CIR-NEXT:    %[[Y_LONG:.+]] = cir.cast(integral, %[[Y]] : !s32i), !s64i
+// CIR-NEXT:    %[[Y_LONG:.+]] = cir.cast integral %[[Y]] : !s32i -> !s64i
 // CIR-NEXT:    %{{.+}} = cir.expect(%[[X_LONG]], %[[Y_LONG]], 2.500000e-01) : !s64i
 // CIR:       }
 
diff --git a/clang/test/CIR/CodeGen/builtin_printf.cpp b/clang/test/CIR/CodeGen/builtin_printf.cpp
index 80875c349bfcf..898984a6c12d3 100644
--- a/clang/test/CIR/CodeGen/builtin_printf.cpp
+++ b/clang/test/CIR/CodeGen/builtin_printf.cpp
@@ -28,11 +28,11 @@ void func(char const * const str, int i) {
 // CIR:   %[[null_ptr:.+]] = cir.const #cir.ptr<null> : !cir.ptr<!s8i>
 // CIR:   %[[printf_result1:.+]] = cir.call @printf(%[[null_ptr]]) nothrow : (!cir.ptr<!s8i>) -> !s32i
 // CIR:   %[[str_fmt_global:.+]] = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 3>>
-// CIR:   %[[str_fmt_ptr:.+]] = cir.cast(array_to_ptrdecay, %[[str_fmt_global]] : !cir.ptr<!cir.array<!s8i x 3>>), !cir.ptr<!s8i>
+// CIR:   %[[str_fmt_ptr:.+]] = cir.cast array_to_ptrdecay %[[str_fmt_global]] : !cir.ptr<!cir.array<!s8i x 3>> -> !cir.ptr<!s8i>
 // CIR:   %[[str_val:.+]] = cir.load{{.*}} %[[str_ptr]] : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
 // CIR:   %[[printf_result2:.+]] = cir.call @printf(%[[str_fmt_ptr]], %[[str_val]]) nothrow : (!cir.ptr<!s8i>, !cir.ptr<!s8i>) -> !s32i
 // CIR:   %[[full_fmt_global:.+]] = cir.get_global @".str.1" : !cir.ptr<!cir.array<!s8i x 7>>
-// CIR:   %[[full_fmt_ptr:.+]] = cir.cast(array_to_ptrdecay, %[[full_fmt_global]] : !cir.ptr<!cir.array<!s8i x 7>>), !cir.ptr<!s8i>
+// CIR:   %[[full_fmt_ptr:.+]] = cir.cast array_to_ptrdecay %[[full_fmt_global]] : !cir.ptr<!cir.array<!s8i x 7>> -> !cir.ptr<!s8i>
 // CIR:   %[[str_val2:.+]] = cir.load{{.*}} %[[str_ptr]] : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
 // CIR:   %[[i_val:.+]] = cir.load{{.*}} %[[i_ptr]] : !cir.ptr<!s32i>, !s32i
 // CIR:   %[[printf_result3:.+]] = cir.call @printf(%[[full_fmt_ptr]], %[[str_val2]], %[[i_val]]) nothrow : (!cir.ptr<!s8i>, !cir.ptr<!s8i>, !s32i) -> !s32i
diff --git a/clang/test/CIR/CodeGen/cast.cpp b/clang/test/CIR/CodeGen/cast.cpp
index caf6de7c7d485..7afa955cf3bcf 100644
--- a/clang/test/CIR/CodeGen/cast.cpp
+++ b/clang/test/CIR/CodeGen/cast.cpp
@@ -12,7 +12,7 @@ unsigned char cxxstaticcast_0(unsigned int x) {
 // CIR:    %[[RV:[0-9]+]] = cir.alloca !u8i, !cir.ptr<!u8i>, ["__retval"] {alignment = 1 : i64}
 // CIR:    cir.store %arg0, %[[XPTR]] : !u32i, !cir.ptr<!u32i>
 // CIR:    %[[XVAL:[0-9]+]] = cir.load{{.*}} %[[XPTR]] : !cir.ptr<!u32i>, !u32i
-// CIR:    %[[CASTED:[0-9]+]] = cir.cast(integral, %[[XVAL]] : !u32i), !u8i
+// CIR:    %[[CASTED:[0-9]+]] = cir.cast integral %[[XVAL]] : !u32i -> !u8i
 // CIR:    cir.store %[[CASTED]], %[[RV]] : !u8i, !cir.ptr<!u8i>
 // CIR:    %[[R:[0-9]+]] = cir.load{{.*}} %1 : !cir.ptr<!u8i>, !u8i
 // CIR:    cir.return %[[R]] : !u8i
@@ -30,55 +30,55 @@ int cStyleCasts_0(unsigned x1, int x2, float x3, short x4, double x5) {
 // LLVM: define{{.*}} i32 @_Z13cStyleCasts_0jifsd
 
   char a = (char)x1; // truncate
-  // CIR: %{{[0-9]+}} = cir.cast(integral, %{{[0-9]+}} : !u32i), !s8i
+  // CIR: %{{[0-9]+}} = cir.cast integral %{{[0-9]+}} : !u32i -> !s8i
   // LLVM: %{{[0-9]+}} = trunc i32 %{{[0-9]+}} to i8
 
   short b = (short)x2; // truncate with sign
-  // CIR: %{{[0-9]+}} = cir.cast(integral, %{{[0-9]+}} : !s32i), !s16i
+  // CIR: %{{[0-9]+}} = cir.cast integral %{{[0-9]+}} : !s32i -> !s16i
   // LLVM: %{{[0-9]+}} = trunc i32 %{{[0-9]+}} to i16
 
   long long c = (long long)x1; // zero extend
-  // CIR: %{{[0-9]+}} = cir.cast(integral, %{{[0-9]+}} : !u32i), !s64i
+  // CIR: %{{[0-9]+}} = cir.cast integral %{{[0-9]+}} : !u32i -> !s64i
   // LLVM: %{{[0-9]+}} = zext i32 %{{[0-9]+}} to i64
 
   long long d = (long long)x2; // sign extend
-  // CIR: %{{[0-9]+}} = cir.cast(integral, %{{[0-9]+}} : !s32i), !s64i
+  // CIR: %{{[0-9]+}} = cir.cast integral %{{[0-9]+}} : !s32i -> !s64i
   // LLVM: %{{[0-9]+}} = sext i32 %{{[0-9]+}} to i64
 
   unsigned ui = (unsigned)x2; // sign drop
-  // CIR: %{{[0-9]+}} = cir.cast(integral, %{{[0-9]+}} : !s32i), !u32i
+  // CIR: %{{[0-9]+}} = cir.cast integral %{{[0-9]+}} : !s32i -> !u32i
 
   int si = (int)x1; // sign add
-  // CIR: %{{[0-9]+}} = cir.cast(integral, %{{[0-9]+}} : !u32i), !s32i
+  // CIR: %{{[0-9]+}} = cir.cast integral %{{[0-9]+}} : !u32i -> !s32i
 
   bool ib;
   int bi = (int)ib; // bool to int
-  // CIR: %{{[0-9]+}} = cir.cast(bool_to_int, %{{[0-9]+}} : !cir.bool), !s32i
+  // CIR: %{{[0-9]+}} = cir.cast bool_to_int %{{[0-9]+}} : !cir.bool -> !s32i
   // LLVM: %{{[0-9]+}} = zext i1 %{{[0-9]+}} to i32
 
   bool b2 = x2; // int to bool
-  // CIR: %{{[0-9]+}} = cir.cast(int_to_bool, %{{[0-9]+}} : !s32i), !cir.bool
+  // CIR: %{{[0-9]+}} = cir.cast int_to_bool %{{[0-9]+}} : !s32i -> !cir.bool
   // LLVM: %[[INTTOBOOL:[0-9]+]]  = icmp ne i32 %{{[0-9]+}}, 0
   // LLVM: zext i1 %[[INTTOBOOL]] to i8
 
   void *p;
   bool b3 = p; // ptr to bool
-  // CIR: %{{[0-9]+}} = cir.cast(ptr_to_bool, %{{[0-9]+}} : !cir.ptr<!void>), !cir.bool
+  // CIR: %{{[0-9]+}} = cir.cast ptr_to_bool %{{[0-9]+}} : !cir.ptr<!void> -> !cir.bool
   // LLVM: %[[PTRTOBOOL:[0-9]+]]  = icmp ne ptr %{{[0-9]+}}, null
   // LLVM: zext i1 %[[PTRTOBOOL]] to i8
 
   float f;
   bool b4 = f; // float to bool
-  // CIR: %{{[0-9]+}} = cir.cast(float_to_bool, %{{[0-9]+}} : !cir.float), !cir.bool
+  // CIR: %{{[0-9]+}} = cir.cast float_to_bool %{{[0-9]+}} : !cir.float -> !cir.bool
   // LLVM: %{{[0-9]+}} = fcmp une float %{{[0-9]+}}, 0.000000e+00
   // LLVM: %{{[0-9]+}} = zext i1 %{{[0-9]+}} to i8
 
   double d2 = f; // float to double
-  // CIR: %{{[0-9]+}} = cir.cast(floating, %{{[0-9]+}} : !cir.float), !cir.double
+  // CIR: %{{[0-9]+}} = cir.cast floating %{{[0-9]+}} : !cir.float -> !cir.double
   // LLVM: %{{[0-9]+}} = fpext float %{{[0-9]+}} to double
 
   f = d2; // double to float
-  // CIR: %{{[0-9]+}} = cir.cast(floating, %{{[0-9]+}} : !cir.double), !cir.float
+  // CIR: %{{[0-9]+}} = cir.cast floating %{{[0-9]+}} : !cir.double -> !cir.float
   // LLVM: %{{[0-9]+}} = fptrunc double %{{[0-9]+}} to float
 
   return 0;
@@ -93,7 +93,7 @@ bool cptr(void *d) {
 // CIR:   %[[DPTR:[0-9]+]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["d", init] {alignment = 8 : i64}
 
 // CIR:   %[[DVAL:[0-9]+]] = cir.load{{.*}} %[[DPTR]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
-// CIR:   %{{[0-9]+}} = cir.cast(ptr_to_bool, %[[DVAL]] : !cir.ptr<!void>), !cir.bool
+// CIR:   %{{[0-9]+}} = cir.cast ptr_to_bool %[[DVAL]] : !cir.ptr<!void> -> !cir.bool
 
 // LLVM-LABEL: define{{.*}} i1 @_Z4cptrPv(ptr %0)
 // LLVM:         %[[ARG_STORAGE:.*]] = alloca ptr, i64 1
@@ -127,7 +127,7 @@ void bitcast() {
 }
 
 // CIR: %[[D_VEC:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<2 x !cir.double>>, !cir.vector<2 x !cir.double>
-// CIR: %[[I_VEC:.*]] = cir.cast(bitcast, %[[D_VEC]] : !cir.vector<2 x !cir.double>), !cir.vector<4 x !s32i>
+// CIR: %[[I_VEC:.*]] = cir.cast bitcast %[[D_VEC]] : !cir.vector<2 x !cir.double> -> !cir.vector<4 x !s32i>
 
 // LLVM: %[[D_VEC:.*]] = load <2 x double>, ptr {{.*}}, align 16
 // LLVM: %[[I_VEC:.*]] = bitcast <2 x double> %[[D_VEC]] to <4 x i32>
diff --git a/clang/test/CIR/CodeGen/cmp.cpp b/clang/test/CIR/CodeGen/cmp.cpp
index 75c8cda0c3603..7e32d16e88d57 100644
--- a/clang/test/CIR/CodeGen/cmp.cpp
+++ b/clang/test/CIR/CodeGen/cmp.cpp
@@ -407,9 +407,9 @@ void bool_cmp(bool a, bool b) {
 // CIR: %[[X_PTR:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["x", init]
 
 // CIR: %[[A1:.*]] = cir.load{{.*}} %[[A_PTR]] : !cir.ptr<!cir.bool>, !cir.bool
-// CIR: %[[A1_INT:.*]] = cir.cast(bool_to_int, %[[A1]] : !cir.bool), !s32i
+// CIR: %[[A1_INT:.*]] = cir.cast bool_to_int %[[A1]] : !cir.bool -> !s32i
 // CIR: %[[B1:.*]] = cir.load{{.*}} %[[B_PTR]] : !cir.ptr<!cir.bool>, !cir.bool
-// CIR: %[[B1_INT:.*]] = cir.cast(bool_to_int, %[[B1]] : !cir.bool), !s32i
+// CIR: %[[B1_INT:.*]] = cir.cast bool_to_int %[[B1]] : !cir.bool -> !s32i
 // CIR: %{{.*}} = cir.cmp(gt, %[[A1_INT]], %[[B1_INT]]) : !s32i, !cir.bool
 // CIR: cir.store{{.*}} {{.*}}, %[[X_PTR]] : !cir.bool, !cir.ptr<!cir.bool>
 
diff --git a/clang/test/CIR/CodeGen/comma.c b/clang/test/CIR/CodeGen/comma.c
index a1479b85d3f04..cc26a3f200664 100644
--- a/clang/test/CIR/CodeGen/comma.c
+++ b/clang/test/CIR/CodeGen/comma.c
@@ -24,7 +24,7 @@ void comma(void) {
 // CIR:         %[[TRUE:.*]] = cir.const #true
 // CIR:         cir.store{{.*}} %[[TRUE]], %[[B]] : !cir.bool, !cir.ptr<!cir.bool>
 // CIR:         %[[CHAR_INI_INIT:.*]] = cir.const #cir.int<65> : !s32i
-// CIR:         %[[CHAR_VAL:.*]] = cir.cast(integral, %[[CHAR_INI_INIT]] : !s32i), !s8i
+// CIR:         %[[CHAR_VAL:.*]] = cir.cast integral %[[CHAR_INI_INIT]] : !s32i -> !s8i
 // CIR:         cir.store{{.*}} %[[CHAR_VAL]], %[[C]] : !s8i, !cir.ptr<!s8i>
 // CIR:         %[[FLOAT_VAL:.*]] = cir.const #cir.fp<3.140000e+00> : !cir.float
 // CIR:         cir.store{{.*}} %[[FLOAT_VAL]], %[[F]] : !cir.float, !cir.ptr<!cir.float>
diff --git a/clang/test/CIR/CodeGen/complex-cast.cpp b/clang/test/CIR/CodeGen/complex-cast.cpp
index a8f51cd627f9d..5dc08eb414a5b 100644
--- a/clang/test/CIR/CodeGen/complex-cast.cpp
+++ b/clang/test/CIR/CodeGen/complex-cast.cpp
@@ -20,7 +20,7 @@ void scalar_to_complex() {
   ci = sd;
 }
 
-// CIR-BEFORE: %[[FP_TO_COMPLEX:.*]] = cir.cast(float_to_complex, %{{.*}} : !cir.double), !cir.complex<!cir.double>
+// CIR-BEFORE: %[[FP_TO_COMPLEX:.*]] = cir.cast float_to_complex %{{.*}} : !cir.double -> !cir.complex<!cir.double>
 
 //      CIR-AFTER: %[[REAL:.*]] = cir.load{{.*}}  %{{.*}} : !cir.ptr<!cir.double>, !cir.double
 // CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.double
@@ -35,7 +35,7 @@ void scalar_to_complex() {
 // OGCG: store double %[[REAL]], ptr {{.*}}, align 8
 // OGCG: store double 0.000000e+00, ptr getelementptr inbounds nuw ({ double, double }, ptr @cd, i32 0, i32 1), align 8
 
-// CIR-BEFORE: %[[INT_TO_COMPLEX:.*]] = cir.cast(int_to_complex, %{{.*}} : !s32i), !cir.complex<!s32i>
+// CIR-BEFORE: %[[INT_TO_COMPLEX:.*]] = cir.cast int_to_complex %{{.*}} : !s32i -> !cir.complex<!s32i>
 
 //      CIR-AFTER: %[[REAL:.*]] = cir.load{{.*}}  %{{.*}} : !cir.ptr<!s32i>, !s32i
 // CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.const #cir.int<0> : !s32i
@@ -50,11 +50,11 @@ void scalar_to_complex() {
 // OGCG: store i32 %[[REAL]], ptr {{.*}}, align 4
 // OGCG: store i32 0, ptr getelementptr inbounds nuw ({ i32, i32 }, ptr @ci, i32 0, i32 1), align 4
 
-// CIR-BEFORE: %[[INT_TO_FP:.*]] = cir.cast(int_to_float, %{{.*}} : !s32i), !cir.double
-// CIR-BEFORE: %[[FP_TO_COMPLEX:.*]] = cir.cast(float_to_complex, %[[INT_TO_FP]] : !cir.double), !cir.complex<!cir.double>
+// CIR-BEFORE: %[[INT_TO_FP:.*]] = cir.cast int_to_float %{{.*}} : !s32i -> !cir.double
+// CIR-BEFORE: %[[FP_TO_COMPLEX:.*]] = cir.cast float_to_complex %[[INT_TO_FP]] : !cir.double -> !cir.complex<!cir.double>
 
 //      CIR-AFTER: %[[TMP:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!s32i>, !s32i
-// CIR-AFTER-NEXT: %[[REAL:.*]] = cir.cast(int_to_float, %[[TMP]] : !s32i), !cir.double
+// CIR-AFTER-NEXT: %[[REAL:.*]] = cir.cast int_to_float %[[TMP]] : !s32i -> !cir.double
 // CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.double
 // CIR-AFTER-NEXT: %{{.*}} = cir.complex.create %[[REAL]], %[[IMAG]] : !cir.double -> !cir.complex<!cir.double>
 
@@ -69,11 +69,11 @@ void scalar_to_complex() {
 // OGCG: store double %[[REAL]], ptr {{.*}}, align 8
 // OGCG: store double 0.000000e+00, ptr getelementptr inbounds nuw ({ double, double }, ptr {{.*}}, i32 0, i32 1), align 8
 
-// CIR-BEFORE: %[[FP_TO_INT:.*]] = cir.cast(float_to_int, %{{.*}} : !cir.double), !s32i
-// CIR-BEFORE: %[[INT_TO_COMPLEX:.*]] = cir.cast(int_to_complex, %[[FP_TO_INT]] : !s32i), !cir.complex<!s32i>
+// CIR-BEFORE: %[[FP_TO_INT:.*]] = cir.cast float_to_int %{{.*}} : !cir.double -> !s32i
+// CIR-BEFORE: %[[INT_TO_COMPLEX:.*]] = cir.cast int_to_complex %[[FP_TO_INT]] : !s32i -> !cir.complex<!s32i>
 
 //      CIR-AFTER: %[[TMP:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.double>, !cir.double
-// CIR-AFTER-NEXT: %[[REAL:.*]] = cir.cast(float_to_int, %[[TMP]] : !cir.double), !s32i
+// CIR-AFTER-NEXT: %[[REAL:.*]] = cir.cast float_to_int %[[TMP]] : !cir.double -> !s32i
 // CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.const #cir.int<0> : !s32i
 // CIR-AFTER-NEXT: %{{.*}} = cir.complex.create %[[REAL]], %[[IMAG]] : !s32i -> !cir.complex<!s32i>
 
@@ -95,7 +95,7 @@ void scalar_to_complex_explicit() {
   ci = (int _Complex)sd;
 }
 
-// CIR-BEFORE: %[[FP_TO_COMPLEX:.*]] = cir.cast(float_to_complex, %{{.*}} : !cir.double), !cir.complex<!cir.double>
+// CIR-BEFORE: %[[FP_TO_COMPLEX:.*]] = cir.cast float_to_complex %{{.*}} : !cir.double -> !cir.complex<!cir.double>
 
 //      CIR-AFTER: %[[REAL:.*]] = cir.load{{.*}}  %{{.*}} : !cir.ptr<!cir.double>, !cir.double
 // CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.double
@@ -110,7 +110,7 @@ void scalar_to_complex_explicit() {
 // OGCG: store double %[[REAL]], ptr {{.*}}, align 8
 // OGCG: store double 0.000000e+00, ptr getelementptr inbounds nuw ({ double, double }, ptr @cd, i32 0, i32 1), align 8
 
-// CIR-BEFORE: %[[INT_TO_COMPLEX:.*]] = cir.cast(int_to_complex, %{{.*}} : !s32i), !cir.complex<!s32i>
+// CIR-BEFORE: %[[INT_TO_COMPLEX:.*]] = cir.cast int_to_complex %{{.*}} : !s32i -> !cir.complex<!s32i>
 
 //      CIR-AFTER: %[[REAL:.*]] = cir.load{{.*}}  %{{.*}} : !cir.ptr<!s32i>, !s32i
 // CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.const #cir.int<0> : !s32i
@@ -125,11 +125,11 @@ void scalar_to_complex_explicit() {
 // OGCG: store i32 %[[REAL]], ptr {{.*}}, align 4
 // OGCG: store i32 0, ptr getelementptr inbounds nuw ({ i32, i32 }, ptr @ci, i32 0, i32 1), align 4
 
-// CIR-BEFORE: %[[INT_TO_FP:.*]] = cir.cast(int_to_float, %{{.*}} : !s32i), !cir.double
-// CIR-BEFORE: %[[FP_TO_COMPLEX:.*]] = cir.cast(float_to_complex, %[[INT_TO_FP]] : !cir.double), !cir.complex<!cir.double>
+// CIR-BEFORE: %[[INT_TO_FP:.*]] = cir.cast int_to_float %{{.*}} : !s32i -> !cir.double
+// CIR-BEFORE: %[[FP_TO_COMPLEX:.*]] = cir.cast float_to_complex %[[INT_TO_FP]] : !cir.double -> !cir.complex<!cir.double>
 
 //      CIR-AFTER: %[[TMP:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!s32i>, !s32i
-// CIR-AFTER-NEXT: %[[REAL:.*]] = cir.cast(int_to_float, %[[TMP]] : !s32i), !cir.double
+// CIR-AFTER-NEXT: %[[REAL:.*]] = cir.cast int_to_float %[[TMP]] : !s32i -> !cir.double
 // CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.double
 // CIR-AFTER-NEXT: %{{.*}} = cir.complex.create %[[REAL]], %[[IMAG]] : !cir.double -> !cir.complex<!cir.double>
 
@@ -144,11 +144,11 @@ void scalar_to_complex_explicit() {
 // OGCG: store double %[[REAL]], ptr {{.*}}, align 8
 // OGCG: store double 0.000000e+00, ptr getelementptr inbounds nuw ({ double, double }, ptr {{.*}}, i32 0, i32 1), align 8
 
-// CIR-BEFORE: %[[FP_TO_INT:.*]] = cir.cast(float_to_int, %{{.*}} : !cir.double), !s32i
-// CIR-BEFORE: %[[INT_TO_COMPLEX:.*]] = cir.cast(int_to_complex, %[[FP_TO_INT]] : !s32i), !cir.complex<!s32i>
+// CIR-BEFORE: %[[FP_TO_INT:.*]] = cir.cast float_to_int %{{.*}} : !cir.double -> !s32i
+// CIR-BEFORE: %[[INT_TO_COMPLEX:.*]] = cir.cast int_to_complex %[[FP_TO_INT]] : !s32i -> !cir.complex<!s32i>
 
 //      CIR-AFTER: %[[TMP:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.double>, !cir.double
-// CIR-AFTER-NEXT: %[[REAL:.*]] = cir.cast(float_to_int, %[[TMP]] : !cir.double), !s32i
+// CIR-AFTER-NEXT: %[[REAL:.*]] = cir.cast float_to_int %[[TMP]] : !cir.double -> !s32i
 // CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.const #cir.int<0> : !s32i
 // CIR-AFTER-NEXT: %{{.*}} = cir.complex.create %[[REAL]], %[[IMAG]] : !s32i -> !cir.complex<!s32i>
 
@@ -170,7 +170,7 @@ void complex_to_scalar() {
   si = (int)cd;
 }
 
-// CIR-BEFORE: %[[FP_TO_COMPLEX_REAL:.*]] = cir.cast(float_complex_to_real, %{{.*}} : !cir.complex<!cir.double>), !cir.double
+// CIR-BEFORE: %[[FP_TO_COMPLEX_REAL:.*]] = cir.cast float_complex_to_real %{{.*}} : !cir.complex<!cir.double> -> !cir.double
 
 // CIR-AFTER: %{{.*}} = cir.complex.real %{{.*}} : !cir.complex<!cir.double> -> !cir.double
 
@@ -180,7 +180,7 @@ void complex_to_scalar() {
 // OGCG: %[[REAL:.*]] = load double, ptr {{.*}}, align 8
 // OGCG: store double %[[REAL]], ptr {{.*}}, align 8
 
-// CIR-BEFORE: %[[INT_COMPLEX_TO_REAL:.*]] = cir.cast(int_complex_to_real, %{{.*}} : !cir.complex<!s32i>), !s32i
+// CIR-BEFORE: %[[INT_COMPLEX_TO_REAL:.*]] = cir.cast int_complex_to_real %{{.*}} : !cir.complex<!s32i> -> !s32i
 
 // CIR-AFTER: %{{.*}} = cir.complex.real %{{.*}} : !cir.complex<!s32i> -> !s32i
 
@@ -190,11 +190,11 @@ void complex_to_scalar() {
 // OGCG: %[[REAL:.*]] = load i32, ptr {{.*}}, align 4
 // OGCG: store i32 %[[REAL]], ptr {{.*}}, align 4
 
-// CIR-BEFORE: %[[INT_COMPLEX_TO_REAL:.*]] = cir.cast(int_complex_to_real, %{{.*}} : !cir.complex<!s32i>), !s32i
-// CIR-BEFORE: %[[INT_TO_FP:.*]] = cir.cast(int_to_float, %[[INT_COMPLEX_TO_REAL]] : !s32i), !cir.double
+// CIR-BEFORE: %[[INT_COMPLEX_TO_REAL:.*]] = cir.cast int_complex_to_real %{{.*}} : !cir.complex<!s32i> -> !s32i
+// CIR-BEFORE: %[[INT_TO_FP:.*]] = cir.cast int_to_float %[[INT_COMPLEX_TO_REAL]] : !s32i -> !cir.double
 
 //      CIR-AFTER: %[[REAL:.*]] = cir.complex.real %{{.*}} : !cir.complex<!s32i> -> !s32i
-// CIR-AFTER-NEXT: %{{.*}} = cir.cast(int_to_float, %[[REAL]] : !s32i), !cir.double
+// CIR-AFTER-NEXT: %{{.*}} = cir.cast int_to_float %[[REAL]] : !s32i -> !cir.double
 
 //      LLVM: %[[REAL:.*]] = extractvalue { i32, i32 } %{{.+}}, 0
 // LLVM-NEXT: %[[REAL_TO_DOUBLE:.*]] = sitofp i32 %[[REAL]] to double
@@ -204,11 +204,11 @@ void complex_to_scalar() {
 // OGCG: %[[INT_TO_FP:.*]] = sitofp i32 %[[REAL]] to double
 // OGCG: store double %[[INT_TO_FP]], ptr {{.*}}, align 8
 
-// CIR-BEFORE: %[[FP_TO_COMPLEX_REAL:.*]] = cir.cast(float_complex_to_real, %{{.*}} : !cir.complex<!cir.double>), !cir.double
-// CIR-BEFORE: %[[FP_TO_INT:.*]] = cir.cast(float_to_int, %[[FP_TO_COMPLEX_REAL]] : !cir.double), !s32i
+// CIR-BEFORE: %[[FP_TO_COMPLEX_REAL:.*]] = cir.cast float_complex_to_real %{{.*}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BEFORE: %[[FP_TO_INT:.*]] = cir.cast float_to_int %[[FP_TO_COMPLEX_REAL]] : !cir.double -> !s32i
 
 //      CIR-AFTER: %[[REAL:.*]] = cir.complex.real %{{.*}} : !cir.complex<!cir.double> -> !cir.double
-// CIR-AFTER-NEXT: %{{.*}} = cir.cast(float_to_int, %[[REAL]] : !cir.double), !s32i
+// CIR-AFTER-NEXT: %{{.*}} = cir.cast float_to_int %[[REAL]] : !cir.double -> !s32i
 
 //      LLVM: %[[REAL:.*]] = extractvalue { double, double } %{{.+}}, 0
 // LLVM-NEXT: %[[REAL_TO_INT:.*]] = fptosi double %[[REAL]] to i32
@@ -223,12 +223,12 @@ void complex_to_bool() {
   b = (bool)ci;
 }
 
-// CIR-BEFORE: %[[FP_COMPLEX_TO_BOOL:.*]] = cir.cast(float_complex_to_bool, %{{.*}} : !cir.complex<!cir.double>), !cir.bool
+// CIR-BEFORE: %[[FP_COMPLEX_TO_BOOL:.*]] = cir.cast float_complex_to_bool %{{.*}} : !cir.complex<!cir.double> -> !cir.bool
 
 //      CIR-AFTER: %[[REAL:.*]] = cir.complex.real %{{.*}} : !cir.complex<!cir.double> -> !cir.double
 // CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.complex.imag %{{.*}} : !cir.complex<!cir.double> -> !cir.double
-// CIR-AFTER-NEXT: %[[REAL_TO_BOOL:.*]] = cir.cast(float_to_bool, %[[REAL]] : !cir.double), !cir.bool
-// CIR-AFTER-NEXT: %[[IMAG_TO_BOOL:.*]] = cir.cast(float_to_bool, %[[IMAG]] : !cir.double), !cir.bool
+// CIR-AFTER-NEXT: %[[REAL_TO_BOOL:.*]] = cir.cast float_to_bool %[[REAL]] : !cir.double -> !cir.bool
+// CIR-AFTER-NEXT: %[[IMAG_TO_BOOL:.*]] = cir.cast float_to_bool %[[IMAG]] : !cir.double -> !cir.bool
 // CIR-AFTER-NEXT: %[[CONST_TRUE:.*]] = cir.const #true
 // CIR-AFTER-NEXT: %{{.*}} = cir.select if %[[REAL_TO_BOOL]] then %[[CONST_TRUE]] else %[[IMAG_TO_BOOL]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
 
@@ -248,12 +248,12 @@ void complex_to_bool() {
 // OGCG: %[[BOOL_TO_INT:.*]] = zext i1 %[[COMPLEX_TO_BOOL]] to i8
 // OGCG: store i8 %[[BOOL_TO_INT]], ptr {{.*}}, align 1
 
-// CIR-BEFORE: %[[INT_COMPLEX_TO_BOOL:.*]] = cir.cast(int_complex_to_bool, %{{.*}} : !cir.complex<!s32i>), !cir.bool
+// CIR-BEFORE: %[[INT_COMPLEX_TO_BOOL:.*]] = cir.cast int_complex_to_bool %{{.*}} : !cir.complex<!s32i> -> !cir.bool
 
 //      CIR-AFTER: %[[REAL:.*]] = cir.complex.real %{{.*}} : !cir.complex<!s32i> -> !s32i
 // CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.complex.imag %{{.*}} : !cir.complex<!s32i> -> !s32i
-// CIR-AFTER-NEXT: %[[REAL_TO_BOOL:.*]] = cir.cast(int_to_bool, %[[REAL]] : !s32i), !cir.bool
-// CIR-AFTER-NEXT: %[[IMAG_TO_BOOL:.*]] = cir.cast(int_to_bool, %[[IMAG]] : !s32i), !cir.bool
+// CIR-AFTER-NEXT: %[[REAL_TO_BOOL:.*]] = cir.cast int_to_bool %[[REAL]] : !s32i -> !cir.bool
+// CIR-AFTER-NEXT: %[[IMAG_TO_BOOL:.*]] = cir.cast int_to_bool %[[IMAG]] : !s32i -> !cir.bool
 // CIR-AFTER-NEXT: %[[CONST_TRUE:.*]] = cir.const #true
 // CIR-AFTER-NEXT: %{{.+}} = cir.select if %[[REAL_TO_BOOL]] then %[[CONST_TRUE]] else %[[IMAG_TO_BOOL]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
 
@@ -279,12 +279,12 @@ void complex_to_complex_cast() {
 }
 
 // CIR-BEFORE: %[[TMP:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
-// CIR-BEFORE: %[[FP_COMPLEX:.*]] = cir.cast(float_complex, %[[TMP]] : !cir.complex<!cir.float>), !cir.complex<!cir.double>
+// CIR-BEFORE: %[[FP_COMPLEX:.*]] = cir.cast float_complex %[[TMP]] : !cir.complex<!cir.float> -> !cir.complex<!cir.double>
 
 // CIR-AFTER: %[[REAL:.*]] = cir.complex.real %{{.*}} : !cir.complex<!cir.float> -> !cir.float
 // CIR-AFTER: %[[IMAG:.*]] = cir.complex.imag %{{.*}} : !cir.complex<!cir.float> -> !cir.float
-// CIR-AFTER: %[[REAL_FP_CAST:.*]] = cir.cast(floating, %[[REAL]] : !cir.float), !cir.double
-// CIR-AFTER: %[[IMAG_FP_CAST:.*]] = cir.cast(floating, %[[IMAG]] : !cir.float), !cir.double
+// CIR-AFTER: %[[REAL_FP_CAST:.*]] = cir.cast floating %[[REAL]] : !cir.float -> !cir.double
+// CIR-AFTER: %[[IMAG_FP_CAST:.*]] = cir.cast floating %[[IMAG]] : !cir.float -> !cir.double
 // CIR-AFTER: %{{.*}} = cir.complex.create %[[REAL_FP_CAST]], %[[IMAG_FP_CAST]] : !cir.double -> !cir.complex<!cir.double>
 
 // LLVM: %[[REAL:.*]] = extractvalue { float, float } %{{.*}}, 0
@@ -303,12 +303,12 @@ void complex_to_complex_cast() {
 // OGCG: store double %[[IMAG_FP_CAST]], ptr getelementptr inbounds nuw ({ double, double }, ptr {{.*}}, i32 0, i32 1), align 8
 
 // CIR-BEFORE: %[[TMP:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.complex<!s16i>>, !cir.complex<!s16i>
-// CIR-BEFORE: %[[INT_COMPLEX:.*]] = cir.cast(int_complex, %[[TMP]] : !cir.complex<!s16i>), !cir.complex<!s32i>
+// CIR-BEFORE: %[[INT_COMPLEX:.*]] = cir.cast int_complex %[[TMP]] : !cir.complex<!s16i> -> !cir.complex<!s32i>
 
 // CIR-AFTER: %[[REAL:.*]] = cir.complex.real %{{.*}} : !cir.complex<!s16i> -> !s16i
 // CIR-AFTER: %[[IMAG:.*]] = cir.complex.imag %{{.*}} : !cir.complex<!s16i> -> !s16i
-// CIR-AFTER: %[[REAL_INT_CAST:.*]] = cir.cast(integral, %[[REAL]] : !s16i), !s32i
-// CIR-AFTER: %[[IMAG_INT_CAST:.*]] = cir.cast(integral, %[[IMAG]] : !s16i), !s32i
+// CIR-AFTER: %[[REAL_INT_CAST:.*]] = cir.cast integral %[[REAL]] : !s16i -> !s32i
+// CIR-AFTER: %[[IMAG_INT_CAST:.*]] = cir.cast integral %[[IMAG]] : !s16i -> !s32i
 // CIR-AFTER: %{{.*}} = cir.complex.create %[[REAL_INT_CAST]], %[[IMAG_INT_CAST]] : !s32i -> !cir.complex<!s32i>
 
 // LLVM: %[[REAL:.*]] = extractvalue { i16, i16 } %{{.*}}, 0
@@ -336,9 +336,9 @@ void lvalue_to_rvalue_bitcast() {
    double _Complex b = __builtin_bit_cast(double _Complex, a);
 }
 
-// CIR-BEFORE: %{{.*}} = cir.cast(bitcast, %{{.*}} : !cir.ptr<!rec_CX>), !cir.ptr<!cir.complex<!cir.double>>
+// CIR-BEFORE: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.ptr<!rec_CX> -> !cir.ptr<!cir.complex<!cir.double>>
 
-// CIR-AFTER: %{{.*}} = cir.cast(bitcast, %{{.*}} : !cir.ptr<!rec_CX>), !cir.ptr<!cir.complex<!cir.double>>
+// CIR-AFTER: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.ptr<!rec_CX> -> !cir.ptr<!cir.complex<!cir.double>>
 
 // LLVM: %[[PTR_ADDR:.*]] = alloca %struct.CX, i64 1, align 8
 // LLVM: %[[COMPLEX_ADDR:.*]] = alloca { double, double }, i64 1, align 8
@@ -361,9 +361,9 @@ void lvalue_bitcast() {
   (double _Complex &)a = {};
 }
 
-// CIR-BEFORE: %{{.*}} = cir.cast(bitcast, %{{.*}} : !cir.ptr<!rec_CX>), !cir.ptr<!cir.complex<!cir.double>>
+// CIR-BEFORE: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.ptr<!rec_CX> -> !cir.ptr<!cir.complex<!cir.double>>
 
-// CIR-AFTER: %{{.*}} = cir.cast(bitcast, %{{.*}} : !cir.ptr<!rec_CX>), !cir.ptr<!cir.complex<!cir.double>>
+// CIR-AFTER: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.ptr<!rec_CX> -> !cir.ptr<!cir.complex<!cir.double>>
 
 // LLVM: %[[A_ADDR:.*]] = alloca %struct.CX, i64 1, align 8
 // LLVM: store { double, double } zeroinitializer, ptr %[[A_ADDR]], align 8
diff --git a/clang/test/CIR/CodeGen/complex-compound-assignment.cpp b/clang/test/CIR/CodeGen/complex-compound-assignment.cpp
index 9909985e7819c..a5070f51fad63 100644
--- a/clang/test/CIR/CodeGen/complex-compound-assignment.cpp
+++ b/clang/test/CIR/CodeGen/complex-compound-assignment.cpp
@@ -154,20 +154,20 @@ void foo3() {
 // CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
 // CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
 // CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
-// CIR: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float
-// CIR: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float
+// CIR: %[[A_REAL_F32:.*]] = cir.cast floating %[[A_REAL]] : !cir.f16 -> !cir.float
+// CIR: %[[A_IMAG_F32:.*]] = cir.cast floating %[[A_IMAG]] : !cir.f16 -> !cir.float
 // CIR: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
 // CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
 // CIR: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!cir.f16> -> !cir.f16
 // CIR: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!cir.f16> -> !cir.f16
-// CIR: %[[B_REAL_F32:.*]] = cir.cast(floating, %[[B_REAL]] : !cir.f16), !cir.float
-// CIR: %[[B_IMAG_F32:.*]] = cir.cast(floating, %[[B_IMAG]] : !cir.f16), !cir.float
+// CIR: %[[B_REAL_F32:.*]] = cir.cast floating %[[B_REAL]] : !cir.f16 -> !cir.float
+// CIR: %[[B_IMAG_F32:.*]] = cir.cast floating %[[B_IMAG]] : !cir.f16 -> !cir.float
 // CIR: %[[B_COMPLEX_F32:.*]] = cir.complex.create %[[B_REAL_F32]], %[[B_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
 // CIR: %[[ADD_A_B:.*]] = cir.complex.add %[[B_COMPLEX_F32]], %[[A_COMPLEX_F32]] : !cir.complex<!cir.float>
 // CIR: %[[ADD_REAL:.*]] = cir.complex.real %[[ADD_A_B]] : !cir.complex<!cir.float> -> !cir.float
 // CIR: %[[ADD_IMAG:.*]] = cir.complex.imag %[[ADD_A_B]] : !cir.complex<!cir.float> -> !cir.float
-// CIR: %[[ADD_REAL_F16:.*]] = cir.cast(floating, %[[ADD_REAL]] : !cir.float), !cir.f16
-// CIR: %[[ADD_IMAG_F16:.*]] = cir.cast(floating, %[[ADD_IMAG]] : !cir.float), !cir.f16
+// CIR: %[[ADD_REAL_F16:.*]] = cir.cast floating %[[ADD_REAL]] : !cir.float -> !cir.f16
+// CIR: %[[ADD_IMAG_F16:.*]] = cir.cast floating %[[ADD_IMAG]] : !cir.float -> !cir.f16
 // CIR: %[[RESULT:.*]] = cir.complex.create %[[ADD_REAL_F16]], %[[ADD_IMAG_F16]] : !cir.f16 -> !cir.complex<!cir.f16>
 // CIR: cir.store{{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>
 
@@ -712,14 +712,14 @@ void foo13() {
 // CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
 // CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
 // CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
-// CIR: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float
-// CIR: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float
+// CIR: %[[A_REAL_F32:.*]] = cir.cast floating %[[A_REAL]] : !cir.f16 -> !cir.float
+// CIR: %[[A_IMAG_F32:.*]] = cir.cast floating %[[A_IMAG]] : !cir.f16 -> !cir.float
 // CIR: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
 // CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
 // CIR: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!cir.f16> -> !cir.f16
 // CIR: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!cir.f16> -> !cir.f16
-// CIR: %[[B_REAL_F32:.*]] = cir.cast(floating, %[[B_REAL]] : !cir.f16), !cir.float
-// CIR: %[[B_IMAG_F32:.*]] = cir.cast(floating, %[[B_IMAG]] : !cir.f16), !cir.float
+// CIR: %[[B_REAL_F32:.*]] = cir.cast floating %[[B_REAL]] : !cir.f16 -> !cir.float
+// CIR: %[[B_IMAG_F32:.*]] = cir.cast floating %[[B_IMAG]] : !cir.f16 -> !cir.float
 // CIR: %[[B_COMPLEX_F32:.*]] = cir.complex.create %[[B_REAL_F32]], %[[B_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
 // CIR: %[[A_REAL_F32:.*]] = cir.complex.real %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
 // CIR: %[[A_IMAG_F32:.*]] = cir.complex.imag %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
@@ -729,8 +729,8 @@ void foo13() {
 // CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
 // CIR: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!cir.f16> -> !cir.f16
 // CIR: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!cir.f16> -> !cir.f16
-// CIR: %[[B_REAL_F32:.*]] = cir.cast(floating, %[[B_REAL]] : !cir.f16), !cir.float
-// CIR: %[[B_IMAG_F32:.*]] = cir.cast(floating, %[[B_IMAG]] : !cir.f16), !cir.float
+// CIR: %[[B_REAL_F32:.*]] = cir.cast floating %[[B_REAL]] : !cir.f16 -> !cir.float
+// CIR: %[[B_IMAG_F32:.*]] = cir.cast floating %[[B_IMAG]] : !cir.f16 -> !cir.float
 // CIR: %[[B_COMPLEX_F32:.*]] = cir.complex.create %[[B_REAL_F32]], %[[B_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
 // CIR: %[[B_REAL_F32:.*]] = cir.complex.real %[[B_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
 // CIR: %[[B_IMAG_F32:.*]] = cir.complex.imag %[[B_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
@@ -739,8 +739,8 @@ void foo13() {
 // CIR: %[[RESULT:.*]] = cir.call @__divsc3(%[[B_REAL_F32]], %[[B_IMAG_F32]], %[[DIV_AB_REAL]], %[[DIV_AB_IMAG]]) : (!cir.float, !cir.float, !cir.float, !cir.float) -> !cir.complex<!cir.float>
 // CIR: %[[RESULT_REAL_F32:.*]] = cir.complex.real %[[RESULT]] : !cir.complex<!cir.float> -> !cir.float
 // CIR: %[[RESULT_IMAG_F32:.*]] = cir.complex.imag %[[RESULT]] : !cir.complex<!cir.float> -> !cir.float
-// CIR: %[[RESULT_REAL_F16:.*]] = cir.cast(floating, %[[RESULT_REAL_F32]] : !cir.float), !cir.f16
-// CIR: %[[RESULT_IMAG_F16:.*]] = cir.cast(floating, %[[RESULT_IMAG_F32]] : !cir.float), !cir.f16
+// CIR: %[[RESULT_REAL_F16:.*]] = cir.cast floating %[[RESULT_REAL_F32]] : !cir.float -> !cir.f16
+// CIR: %[[RESULT_IMAG_F16:.*]] = cir.cast floating %[[RESULT_IMAG_F32]] : !cir.float -> !cir.f16
 // CIR: %[[RESULT_COMPLEX_F16:.*]] = cir.complex.create %[[RESULT_REAL_F16]], %[[RESULT_IMAG_F16]] : !cir.f16 -> !cir.complex<!cir.f16>
 // CIR: cir.store{{.*}} %[[RESULT_COMPLEX_F16]], %[[B_ADDR]] : !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>
 
diff --git a/clang/test/CIR/CodeGen/complex-mul-div.cpp b/clang/test/CIR/CodeGen/complex-mul-div.cpp
index d49304660b4d4..b306981434dc6 100644
--- a/clang/test/CIR/CodeGen/complex-mul-div.cpp
+++ b/clang/test/CIR/CodeGen/complex-mul-div.cpp
@@ -549,10 +549,10 @@ void foo3() {
 // CIR-AFTER-PROMOTED: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
 // CIR-AFTER-PROMOTED: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float
 // CIR-AFTER-PROMOTED: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float
-// CIR-AFTER-PROMOTED: %[[A_REAL_F64:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.float), !cir.double
-// CIR-AFTER-PROMOTED: %[[A_IMAG_F64:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.float), !cir.double
-// CIR-AFTER-PROMOTED: %[[B_REAL_F64:.*]] = cir.cast(floating, %[[B_REAL]] : !cir.float), !cir.double
-// CIR-AFTER-PROMOTED: %[[B_IMAG_F64:.*]] = cir.cast(floating, %[[B_IMAG]] : !cir.float), !cir.double
+// CIR-AFTER-PROMOTED: %[[A_REAL_F64:.*]] = cir.cast floating %[[A_REAL]] : !cir.float -> !cir.double
+// CIR-AFTER-PROMOTED: %[[A_IMAG_F64:.*]] = cir.cast floating %[[A_IMAG]] : !cir.float -> !cir.double
+// CIR-AFTER-PROMOTED: %[[B_REAL_F64:.*]] = cir.cast floating %[[B_REAL]] : !cir.float -> !cir.double
+// CIR-AFTER-PROMOTED: %[[B_IMAG_F64:.*]] = cir.cast floating %[[B_IMAG]] : !cir.float -> !cir.double
 // CIR-AFTER-PROMOTED: %[[MUL_AR_BR:.*]] = cir.binop(mul, %[[A_REAL_F64]], %[[B_REAL_F64]]) : !cir.double
 // CIR-AFTER-PROMOTED: %[[MUL_AI_BI:.*]] = cir.binop(mul, %[[A_IMAG_F64]], %[[B_IMAG_F64]]) : !cir.double
 // CIR-AFTER-PROMOTED: %[[MUL_BR_BR:.*]] = cir.binop(mul, %[[B_REAL_F64]], %[[B_REAL_F64]]) : !cir.double
@@ -567,8 +567,8 @@ void foo3() {
 // CIR-AFTER-PROMOTED: %[[RESULT_F64:.*]] = cir.complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : !cir.double -> !cir.complex<!cir.double>
 // CIR-AFTER-PROMOTED: %[[RESULT_REAL_F64:.*]] = cir.complex.real %[[RESULT_F64]] : !cir.complex<!cir.double> -> !cir.double
 // CIR-AFTER-PROMOTED: %[[RESULT_IMAG_F64:.*]] = cir.complex.imag %[[RESULT_F64]] : !cir.complex<!cir.double> -> !cir.double
-// CIR-AFTER-PROMOTED: %[[RESULT_REAL_F32:.*]] = cir.cast(floating, %[[RESULT_REAL_F64]] : !cir.double), !cir.float
-// CIR-AFTER-PROMOTED: %[[RESULT_IMAG_F32:.*]] = cir.cast(floating, %[[RESULT_IMAG_F64]] : !cir.double), !cir.float
+// CIR-AFTER-PROMOTED: %[[RESULT_REAL_F32:.*]] = cir.cast floating %[[RESULT_REAL_F64]] : !cir.double -> !cir.float
+// CIR-AFTER-PROMOTED: %[[RESULT_IMAG_F32:.*]] = cir.cast floating %[[RESULT_IMAG_F64]] : !cir.double -> !cir.float
 // CIR-AFTER-PROMOTED: %[[RESULT_F32:.*]] = cir.complex.create %[[RESULT_REAL_F32]], %[[RESULT_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
 // CIR-AFTER-PROMOTED: cir.store{{.*}} %[[RESULT_F32]], %[[C_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
 
@@ -1044,10 +1044,10 @@ void foo6() {
 // CIR-AFTER-PROMOTED: %[[A_IMAG:.*]] = cir.complex.imag %[[COMPLEX_A]] : !cir.complex<!cir.float> -> !cir.float
 // CIR-AFTER-PROMOTED: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float
 // CIR-AFTER-PROMOTED: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float
-// CIR-AFTER-PROMOTED: %[[A_REAL_F64:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.float), !cir.double
-// CIR-AFTER-PROMOTED: %[[A_IMAG_F64:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.float), !cir.double
-// CIR-AFTER-PROMOTED: %[[B_REAL_F64:.*]] = cir.cast(floating, %[[B_REAL]] : !cir.float), !cir.double
-// CIR-AFTER-PROMOTED: %[[B_IMAG_F64:.*]] = cir.cast(floating, %[[B_IMAG]] : !cir.float), !cir.double
+// CIR-AFTER-PROMOTED: %[[A_REAL_F64:.*]] = cir.cast floating %[[A_REAL]] : !cir.float -> !cir.double
+// CIR-AFTER-PROMOTED: %[[A_IMAG_F64:.*]] = cir.cast floating %[[A_IMAG]] : !cir.float -> !cir.double
+// CIR-AFTER-PROMOTED: %[[B_REAL_F64:.*]] = cir.cast floating %[[B_REAL]] : !cir.float -> !cir.double
+// CIR-AFTER-PROMOTED: %[[B_IMAG_F64:.*]] = cir.cast floating %[[B_IMAG]] : !cir.float -> !cir.double
 // CIR-AFTER-PROMOTED: %[[MUL_AR_BR:.*]] = cir.binop(mul, %[[A_REAL_F64]], %[[B_REAL_F64]]) : !cir.double
 // CIR-AFTER-PROMOTED: %[[MUL_AI_BI:.*]] = cir.binop(mul, %[[A_IMAG_F64]], %[[B_IMAG_F64]]) : !cir.double
 // CIR-AFTER-PROMOTED: %[[MUL_BR_BR:.*]] = cir.binop(mul, %[[B_REAL_F64]], %[[B_REAL_F64]]) : !cir.double
@@ -1062,8 +1062,8 @@ void foo6() {
 // CIR-AFTER-PROMOTED: %[[RESULT_F64:.*]] = cir.complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : !cir.double -> !cir.complex<!cir.double>
 // CIR-AFTER-PROMOTED: %[[RESULT_REAL_F64:.*]] = cir.complex.real %[[RESULT_F64]] : !cir.complex<!cir.double> -> !cir.double
 // CIR-AFTER-PROMOTED: %[[RESULT_IMAG_F64:.*]] = cir.complex.imag %[[RESULT_F64]] : !cir.complex<!cir.double> -> !cir.double
-// CIR-AFTER-PROMOTED: %[[RESULT_REAL_F32:.*]] = cir.cast(floating, %[[RESULT_REAL_F64]] : !cir.double), !cir.float
-// CIR-AFTER-PROMOTED: %[[RESULT_IMAG_F32:.*]] = cir.cast(floating, %[[RESULT_IMAG_F64]] : !cir.double), !cir.float
+// CIR-AFTER-PROMOTED: %[[RESULT_REAL_F32:.*]] = cir.cast floating %[[RESULT_REAL_F64]] : !cir.double -> !cir.float
+// CIR-AFTER-PROMOTED: %[[RESULT_IMAG_F32:.*]] = cir.cast floating %[[RESULT_IMAG_F64]] : !cir.double -> !cir.float
 // CIR-AFTER-PROMOTED: %[[RESULT_F32:.*]] = cir.complex.create %[[RESULT_REAL_F32]], %[[RESULT_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
 // CIR-AFTER-PROMOTED: cir.store{{.*}} %[[RESULT_F32]], %[[C_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
 
diff --git a/clang/test/CIR/CodeGen/complex-unary.cpp b/clang/test/CIR/CodeGen/complex-unary.cpp
index d79199f23bbfd..a8e434b903763 100644
--- a/clang/test/CIR/CodeGen/complex-unary.cpp
+++ b/clang/test/CIR/CodeGen/complex-unary.cpp
@@ -380,9 +380,9 @@ void foo9() {
 // CIR-BEFORE: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["a"]
 // CIR-BEFORE: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["b", init]
 // CIR-BEFORE: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
-// CIR-BEFORE: %[[A_COMPLEX_F32:.*]] = cir.cast(float_complex, %[[TMP_A]] : !cir.complex<!cir.f16>), !cir.complex<!cir.float>
+// CIR-BEFORE: %[[A_COMPLEX_F32:.*]] = cir.cast float_complex %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.complex<!cir.float>
 // CIR-BEFORE: %[[RESULT:.*]] = cir.unary(plus, %[[A_COMPLEX_F32]]) : !cir.complex<!cir.float>, !cir.complex<!cir.float>
-// CIR-BEFORE: %[[A_COMPLEX_F16:.*]] = cir.cast(float_complex, %[[RESULT]] : !cir.complex<!cir.float>), !cir.complex<!cir.f16>
+// CIR-BEFORE: %[[A_COMPLEX_F16:.*]] = cir.cast float_complex %[[RESULT]] : !cir.complex<!cir.float> -> !cir.complex<!cir.f16>
 // CIR-BEFORE: cir.store{{.*}} %[[A_COMPLEX_F16]], %[[B_ADDR]] : !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>
 
 // CIR-AFTER: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["a"]
@@ -390,8 +390,8 @@ void foo9() {
 // CIR-AFTER: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
 // CIR-AFTER: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
 // CIR-AFTER: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
-// CIR-AFTER: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float
-// CIR-AFTER: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float
+// CIR-AFTER: %[[A_REAL_F32:.*]] = cir.cast floating %[[A_REAL]] : !cir.f16 -> !cir.float
+// CIR-AFTER: %[[A_IMAG_F32:.*]] = cir.cast floating %[[A_IMAG]] : !cir.f16 -> !cir.float
 // CIR-AFTER: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
 // CIR-AFTER: %[[A_REAL_F32:.*]] = cir.complex.real %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
 // CIR-AFTER: %[[A_IMAG_F32:.*]] = cir.complex.imag %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
@@ -400,8 +400,8 @@ void foo9() {
 // CIR-AFTER: %[[RESULT_COMPLEX_F32:.*]] = cir.complex.create %[[RESULT_REAL_F32]], %[[RESULT_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
 // CIR-AFTER: %[[RESULT_REAL_F32:.*]] = cir.complex.real %[[RESULT_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
 // CIR-AFTER: %[[RESULT_IMAG_F32:.*]] = cir.complex.imag %[[RESULT_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
-// CIR-AFTER: %[[RESULT_REAL_F16:.*]] = cir.cast(floating, %[[RESULT_REAL_F32]] : !cir.float), !cir.f16
-// CIR-AFTER: %[[RESULT_IMAG_F16:.*]] = cir.cast(floating, %[[RESULT_IMAG_F32]] : !cir.float), !cir.f16
+// CIR-AFTER: %[[RESULT_REAL_F16:.*]] = cir.cast floating %[[RESULT_REAL_F32]] : !cir.float -> !cir.f16
+// CIR-AFTER: %[[RESULT_IMAG_F16:.*]] = cir.cast floating %[[RESULT_IMAG_F32]] : !cir.float -> !cir.f16
 // CIR-AFTER: %[[RESULT_COMPLEX_F16:.*]] = cir.complex.create %[[RESULT_REAL_F16]], %[[RESULT_IMAG_F16]] : !cir.f16 -> !cir.complex<!cir.f16>
 // CIR-AFTER: cir.store{{.*}} %[[RESULT_COMPLEX_F16]], %[[B_ADDR]] : !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>
 
@@ -445,9 +445,9 @@ void foo10() {
 // CIR-BEFORE: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["a"]
 // CIR-BEFORE: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["b", init]
 // CIR-BEFORE: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
-// CIR-BEFORE: %[[A_COMPLEX_F32:.*]] = cir.cast(float_complex, %[[TMP_A]] : !cir.complex<!cir.f16>), !cir.complex<!cir.float>
+// CIR-BEFORE: %[[A_COMPLEX_F32:.*]] = cir.cast float_complex %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.complex<!cir.float>
 // CIR-BEFORE: %[[RESULT:.*]] = cir.unary(minus, %[[A_COMPLEX_F32]]) : !cir.complex<!cir.float>, !cir.complex<!cir.float>
-// CIR-BEFORE: %[[A_COMPLEX_F16:.*]] = cir.cast(float_complex, %[[RESULT]] : !cir.complex<!cir.float>), !cir.complex<!cir.f16>
+// CIR-BEFORE: %[[A_COMPLEX_F16:.*]] = cir.cast float_complex %[[RESULT]] : !cir.complex<!cir.float> -> !cir.complex<!cir.f16>
 // CIR-BEFORE: cir.store{{.*}} %[[A_COMPLEX_F16]], %[[B_ADDR]] : !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>
 
 // CIR-AFTER: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["a"]
@@ -455,8 +455,8 @@ void foo10() {
 // CIR-AFTER: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
 // CIR-AFTER: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
 // CIR-AFTER: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
-// CIR-AFTER: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float
-// CIR-AFTER: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float
+// CIR-AFTER: %[[A_REAL_F32:.*]] = cir.cast floating %[[A_REAL]] : !cir.f16 -> !cir.float
+// CIR-AFTER: %[[A_IMAG_F32:.*]] = cir.cast floating %[[A_IMAG]] : !cir.f16 -> !cir.float
 // CIR-AFTER: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
 // CIR-AFTER: %[[A_REAL_F32:.*]] = cir.complex.real %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
 // CIR-AFTER: %[[A_IMAG_F32:.*]] = cir.complex.imag %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
@@ -465,8 +465,8 @@ void foo10() {
 // CIR-AFTER: %[[RESULT_COMPLEX_F32:.*]] = cir.complex.create %[[RESULT_REAL_F32]], %[[RESULT_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
 // CIR-AFTER: %[[RESULT_REAL_F32:.*]] = cir.complex.real %[[RESULT_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
 // CIR-AFTER: %[[RESULT_IMAG_F32:.*]] = cir.complex.imag %[[RESULT_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
-// CIR-AFTER: %[[RESULT_REAL_F16:.*]] = cir.cast(floating, %[[RESULT_REAL_F32]] : !cir.float), !cir.f16
-// CIR-AFTER: %[[RESULT_IMAG_F16:.*]] = cir.cast(floating, %[[RESULT_IMAG_F32]] : !cir.float), !cir.f16
+// CIR-AFTER: %[[RESULT_REAL_F16:.*]] = cir.cast floating %[[RESULT_REAL_F32]] : !cir.float -> !cir.f16
+// CIR-AFTER: %[[RESULT_IMAG_F16:.*]] = cir.cast floating %[[RESULT_IMAG_F32]] : !cir.float -> !cir.f16
 // CIR-AFTER: %[[RESULT_COMPLEX_F16:.*]] = cir.complex.create %[[RESULT_REAL_F16]], %[[RESULT_IMAG_F16]] : !cir.f16 -> !cir.complex<!cir.f16>
 // CIR-AFTER: cir.store{{.*}} %[[RESULT_COMPLEX_F16]], %[[B_ADDR]] : !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>
 
diff --git a/clang/test/CIR/CodeGen/complex.cpp b/clang/test/CIR/CodeGen/complex.cpp
index 4c396d312d148..2d58c380c844a 100644
--- a/clang/test/CIR/CodeGen/complex.cpp
+++ b/clang/test/CIR/CodeGen/complex.cpp
@@ -612,7 +612,7 @@ void foo24() {
 // CIR: %[[ARR:.*]] = cir.alloca !cir.array<!cir.complex<!s32i> x 2>, !cir.ptr<!cir.array<!cir.complex<!s32i> x 2>>, ["arr"]
 // CIR: %[[RESULT:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["r", init]
 // CIR: %[[IDX:.*]] = cir.const #cir.int<1> : !s32i
-// CIR: %[[ARR_PTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARR]] : !cir.ptr<!cir.array<!cir.complex<!s32i> x 2>>), !cir.ptr<!cir.complex<!s32i>>
+// CIR: %[[ARR_PTR:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!cir.complex<!s32i> x 2>> -> !cir.ptr<!cir.complex<!s32i>>
 // CIR: %[[RESULT_VAL:.*]] = cir.ptr_stride(%[[ARR_PTR]] : !cir.ptr<!cir.complex<!s32i>>, %[[IDX]] : !s32i), !cir.ptr<!cir.complex<!s32i>>
 // CIR: %[[TMP:.*]] = cir.load{{.*}} %[[RESULT_VAL]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
 // CIR: cir.store{{.*}} %[[TMP]], %[[RESULT]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
@@ -938,11 +938,11 @@ void foo35() {
 // CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
 // CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
 // CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
-// CIR: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float
-// CIR: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float
+// CIR: %[[A_REAL_F32:.*]] = cir.cast floating %[[A_REAL]] : !cir.f16 -> !cir.float
+// CIR: %[[A_IMAG_F32:.*]] = cir.cast floating %[[A_IMAG]] : !cir.f16 -> !cir.float
 // CIR: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
 // CIR: %[[A_REAL_F32:.*]] = cir.complex.real %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
-// CIR: %[[A_REAL_F16:.*]] = cir.cast(floating, %[[A_REAL_F32]] : !cir.float), !cir.f16
+// CIR: %[[A_REAL_F16:.*]] = cir.cast floating %[[A_REAL_F32]] : !cir.float -> !cir.f16
 // CIR: cir.store{{.*}} %[[A_REAL_F16]], %[[REAL_ADDR]] : !cir.f16, !cir.ptr<!cir.f16>
 
 // LLVM: %[[A_ADDR:.*]] = alloca { half, half }, i64 1, align 2
@@ -975,11 +975,11 @@ void foo36() {
 // CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
 // CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
 // CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
-// CIR: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float
-// CIR: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float
+// CIR: %[[A_REAL_F32:.*]] = cir.cast floating %[[A_REAL]] : !cir.f16 -> !cir.float
+// CIR: %[[A_IMAG_F32:.*]] = cir.cast floating %[[A_IMAG]] : !cir.f16 -> !cir.float
 // CIR: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
 // CIR: %[[A_IMAG_F32:.*]] = cir.complex.imag %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
-// CIR: %[[A_IMAG_F16:.*]] = cir.cast(floating, %[[A_IMAG_F32]] : !cir.float), !cir.f16
+// CIR: %[[A_IMAG_F16:.*]] = cir.cast floating %[[A_IMAG_F32]] : !cir.float -> !cir.f16
 // CIR: cir.store{{.*}} %[[A_IMAG_F16]], %[[IMAG_ADDR]] : !cir.f16, !cir.ptr<!cir.f16>
 
 // LLVM: %[[A_ADDR:.*]] = alloca { half, half }, i64 1, align 2
@@ -1102,11 +1102,11 @@ void atomic_complex_type() {
 // CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
 // CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b", init]
 // CIR: %[[ATOMIC_TMP_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["atomic-temp"]
-// CIR: %[[A_PTR:.*]] = cir.cast(bitcast, %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>), !cir.ptr<!u64i>
-// CIR: %[[ATOMIC_TMP_PTR:.*]] = cir.cast(bitcast, %[[ATOMIC_TMP_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>), !cir.ptr<!u64i>
+// CIR: %[[A_PTR:.*]] = cir.cast bitcast %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>> -> !cir.ptr<!u64i>
+// CIR: %[[ATOMIC_TMP_PTR:.*]] = cir.cast bitcast %[[ATOMIC_TMP_ADDR]] : !cir.ptr<!cir.complex<!cir.float>> -> !cir.ptr<!u64i>
 // CIR: %[[TMP_A_ATOMIC:.*]] = cir.load{{.*}} atomic(relaxed) %[[A_PTR]] : !cir.ptr<!u64i>, !u64i
 // CIR: cir.store{{.*}} %[[TMP_A_ATOMIC]], %[[ATOMIC_TMP_PTR]] : !u64i, !cir.ptr<!u64i>
-// CIR: %[[TMP_ATOMIC_PTR:.*]] = cir.cast(bitcast, %[[ATOMIC_TMP_PTR]] : !cir.ptr<!u64i>), !cir.ptr<!cir.complex<!cir.float>>
+// CIR: %[[TMP_ATOMIC_PTR:.*]] = cir.cast bitcast %[[ATOMIC_TMP_PTR]] : !cir.ptr<!u64i> -> !cir.ptr<!cir.complex<!cir.float>>
 // CIR: %[[TMP_ATOMIC:.*]] = cir.load{{.*}} %[[TMP_ATOMIC_PTR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
 // CIR: cir.store{{.*}} %[[TMP_ATOMIC]], %[[B_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
 
@@ -1178,8 +1178,8 @@ void real_on_scalar_with_type_promotion() {
 // CIR: %[[A_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["a"]
 // CIR: %[[B_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["b", init]
 // CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.f16>, !cir.f16
-// CIR: %[[TMP_A_F32:.*]] = cir.cast(floating, %[[TMP_A]] : !cir.f16), !cir.float
-// CIR: %[[TMP_A_F16:.*]] = cir.cast(floating, %[[TMP_A_F32]] : !cir.float), !cir.f16
+// CIR: %[[TMP_A_F32:.*]] = cir.cast floating %[[TMP_A]] : !cir.f16 -> !cir.float
+// CIR: %[[TMP_A_F16:.*]] = cir.cast floating %[[TMP_A_F32]] : !cir.float -> !cir.f16
 // CIR: cir.store{{.*}} %[[TMP_A_F16]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16>
 
 // LLVM: %[[A_ADDR:.*]] = alloca half, i64 1, align 2
@@ -1204,7 +1204,7 @@ void imag_on_scalar_with_type_promotion() {
 // CIR: %[[A_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["a"]
 // CIR: %[[B_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["b", init]
 // CIR: %[[CONST_ZERO:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.float
-// CIR: %[[CONST_ZERO_F16:.*]] = cir.cast(floating, %[[CONST_ZERO]] : !cir.float), !cir.f16
+// CIR: %[[CONST_ZERO_F16:.*]] = cir.cast floating %[[CONST_ZERO]] : !cir.float -> !cir.f16
 // CIR: cir.store{{.*}} %[[CONST_ZERO_F16]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16>
 
 // LLVM: %[[A_ADDR:.*]] = alloca half, i64 1, align 2
@@ -1244,11 +1244,11 @@ void real_on_scalar_from_real_with_type_promotion() {
 // CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
 // CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
 // CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
-// CIR: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float
-// CIR: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float
+// CIR: %[[A_REAL_F32:.*]] = cir.cast floating %[[A_REAL]] : !cir.f16 -> !cir.float
+// CIR: %[[A_IMAG_F32:.*]] = cir.cast floating %[[A_IMAG]] : !cir.f16 -> !cir.float
 // CIR: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
 // CIR: %[[A_REAL_F32:.*]] = cir.complex.real %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
-// CIR: %[[A_REAL_F16:.*]] = cir.cast(floating, %[[A_REAL_F32]] : !cir.float), !cir.f16
+// CIR: %[[A_REAL_F16:.*]] = cir.cast floating %[[A_REAL_F32]] : !cir.float -> !cir.f16
 // CIR: cir.store{{.*}} %[[A_REAL_F16]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16>
 
 // LLVM: %[[A_ADDR:.*]] = alloca { half, half }, i64 1, align 2
@@ -1281,11 +1281,11 @@ void real_on_scalar_from_imag_with_type_promotion() {
 // CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
 // CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
 // CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
-// CIR: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float
-// CIR: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float
+// CIR: %[[A_REAL_F32:.*]] = cir.cast floating %[[A_REAL]] : !cir.f16 -> !cir.float
+// CIR: %[[A_IMAG_F32:.*]] = cir.cast floating %[[A_IMAG]] : !cir.f16 -> !cir.float
 // CIR: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
 // CIR: %[[A_IMAG_F32:.*]] = cir.complex.imag %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
-// CIR: %[[A_IMAG_F16:.*]] = cir.cast(floating, %[[A_IMAG_F32]] : !cir.float), !cir.f16
+// CIR: %[[A_IMAG_F16:.*]] = cir.cast floating %[[A_IMAG_F32]] : !cir.float -> !cir.f16
 // CIR: cir.store{{.*}} %[[A_IMAG_F16]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16>
 
 // LLVM: %[[A_ADDR:.*]] = alloca { half, half }, i64 1, align 2
diff --git a/clang/test/CIR/CodeGen/cxx-default-init.cpp b/clang/test/CIR/CodeGen/cxx-default-init.cpp
index 06d3a27f61cc9..b3d706ffa831f 100644
--- a/clang/test/CIR/CodeGen/cxx-default-init.cpp
+++ b/clang/test/CIR/CodeGen/cxx-default-init.cpp
@@ -33,7 +33,7 @@ struct ZeroInit {
 // CIR:   %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
 // CIR:   cir.store{{.*}} %[[ZERO]], %[[P_B]]
 // CIR:   %[[ARR:.*]] = cir.get_member %[[THIS]][2] {name = "arr"}
-// CIR:   %[[ARR_BEGIN:.*]] = cir.cast(array_to_ptrdecay, %[[ARR]] : !cir.ptr<!cir.array<!s32i x 4>>), !cir.ptr<!s32i>
+// CIR:   %[[ARR_BEGIN:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!s32i x 4>> -> !cir.ptr<!s32i>
 // CIR:   cir.store{{.*}} %[[ARR_BEGIN]], %[[ITER]]
 // CIR:   %[[FOUR:.*]] = cir.const #cir.int<4> : !s64i
 // CIR:   %[[END:.*]] = cir.ptr_stride(%[[ARR_BEGIN]] : !cir.ptr<!s32i>, %[[FOUR]] : !s64i)
@@ -139,7 +139,7 @@ struct ValueInit {
 // CIR:   %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
 // CIR:   cir.store{{.*}} %[[THREE]], %[[P_B]]
 // CIR:   %[[ARR:.*]] = cir.get_member %[[THIS]][2] {name = "arr"}
-// CIR:   %[[ARR_BEGIN:.*]] = cir.cast(array_to_ptrdecay, %[[ARR]] : !cir.ptr<!cir.array<!s32i x 4>>), !cir.ptr<!s32i>
+// CIR:   %[[ARR_BEGIN:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!s32i x 4>> -> !cir.ptr<!s32i>
 // CIR:   %[[FOUR:.*]] = cir.const #cir.int<4> : !s32i
 // CIR:   cir.store{{.*}} %[[FOUR]], %[[ARR_BEGIN]]
 // CIR:   %[[ONE:.*]] = cir.const #cir.int<1> : !s64i
@@ -169,7 +169,7 @@ struct ValueInit {
 // CIR:   cir.store{{.*}} %[[FOUR_FIVEI]], %[[C]]
 // CIR:   %[[BF:.*]] = cir.get_member %[[THIS]][4] {name = "bf"}
 // CIR:   %[[FF:.*]] = cir.const #cir.int<255> : !s32i
-// CIR:   %[[FF_CAST:.*]] = cir.cast(integral, %[[FF]] : !s32i), !u32i
+// CIR:   %[[FF_CAST:.*]] = cir.cast integral %[[FF]] : !s32i -> !u32i
 // CIR:   %[[BF_VAL:.*]] = cir.set_bitfield{{.*}} (#bfi_bf, %[[BF]] : !cir.ptr<!u8i>, %[[FF_CAST]] : !u32i)
 
 // LLVM: define{{.*}} void @_ZN9ValueInitC2Ev(ptr %[[THIS_ARG:.*]])
diff --git a/clang/test/CIR/CodeGen/delegating-ctor.cpp b/clang/test/CIR/CodeGen/delegating-ctor.cpp
index 73ee6b719940a..c95ecf44dcb10 100644
--- a/clang/test/CIR/CodeGen/delegating-ctor.cpp
+++ b/clang/test/CIR/CodeGen/delegating-ctor.cpp
@@ -116,23 +116,23 @@ Derived::Derived(const void *inVoid) { squawk(); }
 // CIR:        %[[THIS:.*]] = cir.load %[[THIS_ADDR]]
 // CIR:        %[[VTT:.*]] = cir.load{{.*}} %[[VTT_ADDR]]
 // CIR:        %[[VPTR_GLOBAL_ADDR:.*]] = cir.vtt.address_point %[[VTT]] : !cir.ptr<!cir.ptr<!void>>, offset = 0 -> !cir.ptr<!cir.ptr<!void>>
-// CIR:        %[[VPTR_PTR:.*]] = cir.cast(bitcast, %[[VPTR_GLOBAL_ADDR]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!cir.vptr>
+// CIR:        %[[VPTR_PTR:.*]] = cir.cast bitcast %[[VPTR_GLOBAL_ADDR]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!cir.vptr>
 // CIR:        %[[VPTR:.*]] = cir.load{{.*}} %[[VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr
 // CIR:        %[[VPTR_ADDR:.*]] = cir.vtable.get_vptr %[[THIS]] : !cir.ptr<!rec_Derived> -> !cir.ptr<!cir.vptr>
 // CIR:        cir.store{{.*}} %[[VPTR]], %[[VPTR_ADDR]] : !cir.vptr, !cir.ptr<!cir.vptr>
 // CIR:        %[[VPTR_BASE_ADDR:.*]] = cir.vtt.address_point %[[VTT]] : !cir.ptr<!cir.ptr<!void>>, offset = 1 -> !cir.ptr<!cir.ptr<!void>>
-// CIR:        %[[VPTR_BASE_PTR:.*]] = cir.cast(bitcast, %[[VPTR_BASE_ADDR]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!cir.vptr>
+// CIR:        %[[VPTR_BASE_PTR:.*]] = cir.cast bitcast %[[VPTR_BASE_ADDR]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!cir.vptr>
 // CIR:        %[[VPTR_BASE:.*]] = cir.load{{.*}} %[[VPTR_BASE_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr
 // CIR:        %[[VPTR_DERIVED_ADDR:.*]] = cir.vtable.get_vptr %[[THIS]] : !cir.ptr<!rec_Derived> -> !cir.ptr<!cir.vptr>
 // CIR:        %[[VPTR_DERIVED:.*]] = cir.load{{.*}} %[[VPTR_DERIVED_ADDR]] : !cir.ptr<!cir.vptr>, !cir.vptr
-// CIR:        %[[VPTR_DERIVED_AS_I8PTR:.*]] = cir.cast(bitcast, %[[VPTR_DERIVED]] : !cir.vptr), !cir.ptr<!u8i>
+// CIR:        %[[VPTR_DERIVED_AS_I8PTR:.*]] = cir.cast bitcast %[[VPTR_DERIVED]] : !cir.vptr -> !cir.ptr<!u8i>
 // CIR:        %[[BASE_LOC_OFFSET:.*]] = cir.const #cir.int<-32> : !s64i
 // CIR:        %[[BASE_OFFSET_PTR:.*]] = cir.ptr_stride(%[[VPTR_DERIVED_AS_I8PTR]] : !cir.ptr<!u8i>, %[[BASE_LOC_OFFSET]] : !s64i), !cir.ptr<!u8i>
-// CIR:        %[[BASE_OFFSET_I64PTR:.*]] = cir.cast(bitcast, %[[BASE_OFFSET_PTR]] : !cir.ptr<!u8i>), !cir.ptr<!s64i>
+// CIR:        %[[BASE_OFFSET_I64PTR:.*]] = cir.cast bitcast %[[BASE_OFFSET_PTR]] : !cir.ptr<!u8i> -> !cir.ptr<!s64i>
 // CIR:        %[[BASE_OFFSET:.*]] = cir.load{{.*}} %[[BASE_OFFSET_I64PTR]] : !cir.ptr<!s64i>, !s64i
-// CIR:        %[[THIS_AS_I8PTR:.*]] = cir.cast(bitcast, %[[THIS]] : !cir.ptr<!rec_Derived>), !cir.ptr<!u8i>
+// CIR:        %[[THIS_AS_I8PTR:.*]] = cir.cast bitcast %[[THIS]] : !cir.ptr<!rec_Derived> -> !cir.ptr<!u8i>
 // CIR:        %[[BASE_PTR:.*]] = cir.ptr_stride(%[[THIS_AS_I8PTR]] : !cir.ptr<!u8i>, %[[BASE_OFFSET]] : !s64i), !cir.ptr<!u8i>
-// CIR:        %[[BASE_AS_I8PTR:.*]] = cir.cast(bitcast, %[[BASE_PTR]] : !cir.ptr<!u8i>), !cir.ptr<!rec_Derived>
+// CIR:        %[[BASE_AS_I8PTR:.*]] = cir.cast bitcast %[[BASE_PTR]] : !cir.ptr<!u8i> -> !cir.ptr<!rec_Derived>
 // CIR:        %[[BASE_VPTR_ADDR:.*]] = cir.vtable.get_vptr %[[BASE_AS_I8PTR]] : !cir.ptr<!rec_Derived> -> !cir.ptr<!cir.vptr>
 // CIR:        cir.store{{.*}} %[[VPTR_BASE]], %[[BASE_VPTR_ADDR]] : !cir.vptr, !cir.ptr<!cir.vptr>
 // CIR:        %[[VPTR_BASE_ADDR:.*]] = cir.vtable.get_vptr %[[THIS]] : !cir.ptr<!rec_Derived> -> !cir.ptr<!cir.vptr>
diff --git a/clang/test/CIR/CodeGen/delete.cpp b/clang/test/CIR/CodeGen/delete.cpp
index f21d203f266e5..69640aa04531f 100644
--- a/clang/test/CIR/CodeGen/delete.cpp
+++ b/clang/test/CIR/CodeGen/delete.cpp
@@ -21,7 +21,7 @@ void test_sized_delete(SizedDelete *x) {
 
 // CIR: cir.func dso_local @_Z17test_sized_deleteP11SizedDelete
 // CIR:   %[[X:.*]] = cir.load{{.*}} %{{.*}}
-// CIR:   %[[X_CAST:.*]] = cir.cast(bitcast, %[[X]] : !cir.ptr<!rec_SizedDelete>), !cir.ptr<!void>
+// CIR:   %[[X_CAST:.*]] = cir.cast bitcast %[[X]] : !cir.ptr<!rec_SizedDelete> -> !cir.ptr<!void>
 // CIR:   %[[OBJ_SIZE:.*]] = cir.const #cir.int<4> : !u64i
 // CIR:   cir.call @_ZN11SizedDeletedlEPvm(%[[X_CAST]], %[[OBJ_SIZE]]) nothrow : (!cir.ptr<!void>, !u64i) -> ()
 
@@ -62,7 +62,7 @@ Container::~Container() { delete contents; }
 // CIR:   %[[CONTENTS_PTR_ADDR:.*]] = cir.get_member %[[THIS]][0] {name = "contents"} : !cir.ptr<!rec_Container> -> !cir.ptr<!cir.ptr<!rec_Contents>>
 // CIR:   %[[CONTENTS_PTR:.*]] = cir.load{{.*}} %[[CONTENTS_PTR_ADDR]]
 // CIR:   cir.call @_ZN8ContentsD2Ev(%[[CONTENTS_PTR]]) nothrow : (!cir.ptr<!rec_Contents>) -> ()
-// CIR:   %[[CONTENTS_CAST:.*]] = cir.cast(bitcast, %[[CONTENTS_PTR]] : !cir.ptr<!rec_Contents>), !cir.ptr<!void>
+// CIR:   %[[CONTENTS_CAST:.*]] = cir.cast bitcast %[[CONTENTS_PTR]] : !cir.ptr<!rec_Contents> -> !cir.ptr<!void>
 // CIR:   %[[OBJ_SIZE:.*]] = cir.const #cir.int<1> : !u64i
 // CIR:   cir.call @_ZdlPvm(%[[CONTENTS_CAST]], %[[OBJ_SIZE]]) nothrow : (!cir.ptr<!void>, !u64i) -> ()
 
diff --git a/clang/test/CIR/CodeGen/destructors.cpp b/clang/test/CIR/CodeGen/destructors.cpp
index fde0732a4352f..1ede1569a826f 100644
--- a/clang/test/CIR/CodeGen/destructors.cpp
+++ b/clang/test/CIR/CodeGen/destructors.cpp
@@ -64,7 +64,7 @@ void test_array_destructor() {
 // CIR: cir.func dso_local @_Z21test_array_destructorv()
 // CIR:   %[[ARR:.*]] = cir.alloca !cir.array<!rec_array_element x 5>, !cir.ptr<!cir.array<!rec_array_element x 5>>, ["arr", init]
 // CIR:   %[[ARR_PTR:.*]] = cir.alloca !cir.ptr<!rec_array_element>, !cir.ptr<!cir.ptr<!rec_array_element>>, ["arrayinit.temp", init]
-// CIR:   %[[BEGIN:.*]] = cir.cast(array_to_ptrdecay, %[[ARR]] : !cir.ptr<!cir.array<!rec_array_element x 5>>)
+// CIR:   %[[BEGIN:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!rec_array_element x 5>>
 // CIR:   cir.store{{.*}} %[[BEGIN]], %[[ARR_PTR]]
 // CIR:   %[[FIVE:.*]] = cir.const #cir.int<5> : !s64i
 // CIR:   %[[ARR_END:.*]] = cir.ptr_stride(%[[BEGIN]] : !cir.ptr<!rec_array_element>, %[[FIVE]] : !s64i)
@@ -80,7 +80,7 @@ void test_array_destructor() {
 // CIR:     cir.condition(%[[CMP]])
 // CIR:   }
 // CIR:   %[[FOUR:.*]] = cir.const #cir.int<4> : !u64i
-// CIR:   %[[BEGIN:.*]] = cir.cast(array_to_ptrdecay, %[[ARR]] : !cir.ptr<!cir.array<!rec_array_element x 5>>)
+// CIR:   %[[BEGIN:.*]] = cir.cast array_to_ptrdecay %[[ARR]] : !cir.ptr<!cir.array<!rec_array_element x 5>>
 // CIR:   %[[END:.*]] = cir.ptr_stride(%[[BEGIN]] : !cir.ptr<!rec_array_element>, %[[FOUR]] : !u64i)
 // CIR:   %[[ARR_PTR:.*]] = cir.alloca !cir.ptr<!rec_array_element>, !cir.ptr<!cir.ptr<!rec_array_element>>, ["__array_idx"]
 // CIR:   cir.store %[[END]], %[[ARR_PTR]]
diff --git a/clang/test/CIR/CodeGen/finegrain-bitfield-access.cpp b/clang/test/CIR/CodeGen/finegrain-bitfield-access.cpp
index 930b0a9c70059..d9ccd273ff3ba 100644
--- a/clang/test/CIR/CodeGen/finegrain-bitfield-access.cpp
+++ b/clang/test/CIR/CodeGen/finegrain-bitfield-access.cpp
@@ -70,7 +70,7 @@ void write8_1() {
 
 // CIR-LABEL: @_Z8write8_1v
 // CIR: [[CONST3:%.*]] = cir.const #cir.int<3> : !s32i
-// CIR: [[INT3:%.*]] = cir.cast(integral, [[CONST3]] : !s32i), !u32i
+// CIR: [[INT3:%.*]] = cir.cast integral [[CONST3]] : !s32i -> !u32i
 // CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f3"} : !cir.ptr<!rec_S1> -> !cir.ptr<!u8i>
 // CIR: cir.set_bitfield align(1) (#bfi_f3, [[MEMBER]] : !cir.ptr<!u8i>, [[INT3]] : !u32i) -> !u32i
 
@@ -116,7 +116,7 @@ void write8_2() {
 
 // CIR-LABEL: @_Z8write8_2v
 // CIR: [[CONST3:%.*]] = cir.const #cir.int<3> : !s32i
-// CIR: [[INT3:%.*]] = cir.cast(integral, [[CONST3]] : !s32i), !u32i
+// CIR: [[INT3:%.*]] = cir.cast integral [[CONST3]] : !s32i -> !u32i
 // CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[2] {name = "f5"} : !cir.ptr<!rec_S1> -> !cir.ptr<!u16i>
 // CIR: cir.set_bitfield align(2) (#bfi_f5, %3 : !cir.ptr<!u16i>, {{.*}} : !u32i) -> !u32i
 
@@ -141,7 +141,7 @@ unsigned read16_1() {
 // CIR-LABEL: @_Z8read16_1v
 // CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[0] {name = "f1"} : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
 // CIR: [[BITFI:%.*]] = cir.get_bitfield align(8) (#bfi_f1, [[MEMBER]] : !cir.ptr<!u16i>) -> !u64i
-// CIR: [[BFCAST:%.*]] = cir.cast(integral, [[BITFI]] : !u64i), !u32i
+// CIR: [[BFCAST:%.*]] = cir.cast integral [[BITFI]] : !u64i -> !u32i
 // CIR: cir.store [[BFCAST]], {{.*}} : !u32i, !cir.ptr<!u32i>
 // CIR: [[RET:%.*]] = cir.load {{.*}} : !cir.ptr<!u32i>, !u32i
 // CIR: cir.return [[RET]] : !u32i
@@ -167,7 +167,7 @@ unsigned read16_2() {
 // CIR-LABEL: @_Z8read16_2v
 // CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f2"} : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
 // CIR: [[BITFI:%.*]] = cir.get_bitfield align(2) (#bfi_f2, [[MEMBER]] : !cir.ptr<!u16i>) -> !u64i
-// CIR: [[BFCAST:%.*]] = cir.cast(integral, [[BITFI]] : !u64i), !u32i
+// CIR: [[BFCAST:%.*]] = cir.cast integral [[BITFI]] : !u64i -> !u32i
 // CIR: cir.store [[BFCAST]], {{.*}} : !u32i, !cir.ptr<!u32i>
 // CIR: [[RET:%.*]] = cir.load {{.*}} : !cir.ptr<!u32i>, !u32i
 // CIR: cir.return [[RET]] : !u32i
@@ -192,7 +192,7 @@ void write16_1() {
 
 // CIR-LABEL: @_Z9write16_1v
 // CIR: [[CONST5:%.*]] = cir.const #cir.int<5> : !s32i
-// CIR: [[INT5:%.*]] = cir.cast(integral, [[CONST5]] : !s32i), !u64i
+// CIR: [[INT5:%.*]] = cir.cast integral [[CONST5]] : !s32i -> !u64i
 // CIR: [[MEMBER:%.*]]  = cir.get_member {{.*}}[0] {name = "f1"} : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
 // CIR: cir.set_bitfield align(8) (#bfi_f1, [[MEMBER]] : !cir.ptr<!u16i>, [[INT5]] : !u64i) -> !u64i
 // CIR: cir.return
@@ -212,7 +212,7 @@ void write16_2() {
 
 // CIR-LABEL: @_Z9write16_2v
 // CIR: [[CONST5:%.*]] = cir.const #cir.int<5> : !s32i
-// CIR: [[INT5:%.*]] = cir.cast(integral, [[CONST5]] : !s32i), !u64i
+// CIR: [[INT5:%.*]] = cir.cast integral [[CONST5]] : !s32i -> !u64i
 // CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f2"} : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
 // CIR: cir.set_bitfield align(2) (#bfi_f2, [[MEMBER]] : !cir.ptr<!u16i>, {{.*}} : !u64i) -> !u64i
 // CIR: cir.return
@@ -232,7 +232,7 @@ unsigned read32_1() {
 // CIR-LABEL: @_Z8read32_1v
 // CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f3"} : !cir.ptr<!rec_S3> -> !cir.ptr<!u32i>
 // CIR: [[BITFI:%.*]] = cir.get_bitfield align(4) (#bfi_f3_1, [[MEMBER]] : !cir.ptr<!u32i>) -> !u64i
-// CIR: [[BFCAST:%.*]] = cir.cast(integral, [[BITFI]] : !u64i), !u32i
+// CIR: [[BFCAST:%.*]] = cir.cast integral [[BITFI]] : !u64i -> !u32i
 // CIR: cir.store [[BFCAST]], {{.*}} : !u32i, !cir.ptr<!u32i>
 // CIR: [[RET:%.*]] = cir.load {{.*}} : !cir.ptr<!u32i>, !u32i
 // CIR: cir.return [[RET]] : !u32i
@@ -257,7 +257,7 @@ void write32_1() {
 
 // CIR-LABEL: @_Z9write32_1v
 // CIR: [[CONST5:%.*]] = cir.const #cir.int<5> : !s32i
-// CIR: [[INT5:%.*]] = cir.cast(integral, [[CONST5]] : !s32i), !u64i
+// CIR: [[INT5:%.*]] = cir.cast integral [[CONST5]] : !s32i -> !u64i
 // CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f3"} : !cir.ptr<!rec_S3> -> !cir.ptr<!u32i>
 // CIR: cir.set_bitfield align(4) (#bfi_f3_1, [[MEMBER]] : !cir.ptr<!u32i>, [[INT5]] : !u64i) -> !u64i
 // CIR: cir.return
diff --git a/clang/test/CIR/CodeGen/if.cpp b/clang/test/CIR/CodeGen/if.cpp
index daaec8a61484d..823539b88834f 100644
--- a/clang/test/CIR/CodeGen/if.cpp
+++ b/clang/test/CIR/CodeGen/if.cpp
@@ -74,7 +74,7 @@ void if1(int a) {
 // CIR: cir.func{{.*}} @_Z3if1i(%arg0: !s32i loc({{.*}}))
 // CIR: cir.scope {
 // CIR:   %3 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
-// CIR:   %4 = cir.cast(int_to_bool, %3 : !s32i), !cir.bool
+// CIR:   %4 = cir.cast int_to_bool %3 : !s32i -> !cir.bool
 // CIR-NEXT:   cir.if %4 {
 // CIR-NEXT:     %5 = cir.const #cir.int<3> : !s32i
 // CIR-NEXT:     cir.store{{.*}} %5, %1 : !s32i, !cir.ptr<!s32i>
@@ -141,7 +141,7 @@ void if2(int a, bool b, bool c) {
 // CIR: cir.func{{.*}} @_Z3if2ibb(%arg0: !s32i loc({{.*}}), %arg1: !cir.bool loc({{.*}}), %arg2: !cir.bool loc({{.*}}))
 // CIR: cir.scope {
 // CIR:   %5 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
-// CIR:   %6 = cir.cast(int_to_bool, %5 : !s32i), !cir.bool
+// CIR:   %6 = cir.cast int_to_bool %5 : !s32i -> !cir.bool
 // CIR:   cir.if %6 {
 // CIR:     %7 = cir.const #cir.int<3> : !s32i
 // CIR:     cir.store{{.*}} %7, %3 : !s32i, !cir.ptr<!s32i>
@@ -267,7 +267,7 @@ int if_init() {
 // CIR:   %[[CONST42:.*]] = cir.const #cir.int<42> : !s32i
 // CIR:   cir.store{{.*}} %[[CONST42]], %[[X]] : !s32i, !cir.ptr<!s32i>
 // CIR:   %[[X_VAL:.*]] = cir.load{{.*}} %[[X]] : !cir.ptr<!s32i>, !s32i
-// CIR:   %[[COND:.*]] = cir.cast(int_to_bool, %[[X_VAL]] : !s32i), !cir.bool
+// CIR:   %[[COND:.*]] = cir.cast int_to_bool %[[X_VAL]] : !s32i -> !cir.bool
 // CIR:   cir.if %[[COND]] {
 // CIR:     %[[X_IF:.*]] = cir.load{{.*}} %[[X]] : !cir.ptr<!s32i>, !s32i
 // CIR:     %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
diff --git a/clang/test/CIR/CodeGen/int-to-bool.cpp b/clang/test/CIR/CodeGen/int-to-bool.cpp
index ad36af4552c2f..97b799b60d25f 100644
--- a/clang/test/CIR/CodeGen/int-to-bool.cpp
+++ b/clang/test/CIR/CodeGen/int-to-bool.cpp
@@ -10,7 +10,7 @@ bool f1(unsigned char c) {
 }
 
 // CIR: cir.func{{.*}} @_Z2f1h
-// CIR:   cir.cast(int_to_bool, %{{.*}} : !u8i), !cir.bool
+// CIR:   cir.cast int_to_bool %{{.*}} : !u8i -> !cir.bool
 
 // Note: The full zext/store/load/trunc sequence is checked here to show what
 // CIR is being lowered to. There's no need to check it for every function since
@@ -33,7 +33,7 @@ bool f2(short s) {
 }
 
 // CIR: cir.func{{.*}} @_Z2f2s
-// CIR:   cir.cast(int_to_bool, %{{.*}} : !s16i), !cir.bool
+// CIR:   cir.cast int_to_bool %{{.*}} : !s16i -> !cir.bool
 
 // LLVM: define{{.*}} i1 @_Z2f2s
 // LLVM:   %[[CMP:.*]] = icmp ne i16 %4, 0
@@ -48,7 +48,7 @@ bool f3(unsigned u) {
 }
 
 // CIR: cir.func{{.*}} @_Z2f3j
-// CIR:   cir.cast(int_to_bool, %{{.*}} : !u32i), !cir.bool
+// CIR:   cir.cast int_to_bool %{{.*}} : !u32i -> !cir.bool
 
 // LLVM: define{{.*}} i1 @_Z2f3j
 // LLVM:   %[[CMP:.*]] = icmp ne i32 %4, 0
@@ -63,7 +63,7 @@ bool f4(long l) {
 }
 
 // CIR: cir.func{{.*}} @_Z2f4l
-// CIR:   cir.cast(int_to_bool, %{{.*}} : !s64i), !cir.bool
+// CIR:   cir.cast int_to_bool %{{.*}} : !s64i -> !cir.bool
 
 // LLVM: define{{.*}} i1 @_Z2f4l
 // LLVM:   %[[CMP:.*]] = icmp ne i64 %4, 0
diff --git a/clang/test/CIR/CodeGen/loop.cpp b/clang/test/CIR/CodeGen/loop.cpp
index 0eba0bbc97c15..b30589cd1b6ec 100644
--- a/clang/test/CIR/CodeGen/loop.cpp
+++ b/clang/test/CIR/CodeGen/loop.cpp
@@ -205,10 +205,10 @@ void l4() {
 // CIR:     %[[N_ADDR:.*]] = cir.alloca {{.*}} ["n", init]
 // CIR:     cir.store{{.*}} %[[A_ADDR]], %[[RANGE_ADDR]]
 // CIR:     %[[RANGE_LOAD:.*]] = cir.load{{.*}} %[[RANGE_ADDR]]
-// CIR:     %[[RANGE_CAST:.*]] = cir.cast(array_to_ptrdecay, %[[RANGE_LOAD]] : {{.*}})
+// CIR:     %[[RANGE_CAST:.*]] = cir.cast array_to_ptrdecay %[[RANGE_LOAD]] : {{.*}}
 // CIR:     cir.store{{.*}} %[[RANGE_CAST]], %[[BEGIN_ADDR]]
 // CIR:     %[[BEGIN:.*]] = cir.load{{.*}} %[[RANGE_ADDR]]
-// CIR:     %[[BEGIN_CAST:.*]] = cir.cast(array_to_ptrdecay, %[[BEGIN]] : {{.*}})
+// CIR:     %[[BEGIN_CAST:.*]] = cir.cast array_to_ptrdecay %[[BEGIN]] : {{.*}}
 // CIR:     %[[TEN:.*]] = cir.const #cir.int<10>
 // CIR:     %[[END_PTR:.*]] = cir.ptr_stride(%[[BEGIN_CAST]] : {{.*}}, %[[TEN]] : {{.*}})
 // CIR:     cir.store{{.*}} %[[END_PTR]], %[[END_ADDR]]
@@ -312,7 +312,7 @@ void l5() {
 // CIR:     %[[BEGIN_ADDR:.*]] = cir.alloca {{.*}} ["__begin1", init]
 // CIR:     %[[END_ADDR:.*]] = cir.alloca {{.*}} ["__end1", init]
 // CIR:     %[[X_ADDR:.*]] = cir.alloca {{.*}} ["x", init]
-// CIR:     %[[ARR_CAST:.*]] = cir.cast(array_to_ptrdecay, %[[ARR_ADDR]] : {{.*}})
+// CIR:     %[[ARR_CAST:.*]] = cir.cast array_to_ptrdecay %[[ARR_ADDR]] : {{.*}}
 // CIR:     %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CIR:     cir.store{{.*}} %[[ONE]], %[[ARR_CAST]]
 // CIR:     %[[OFFSET1:.*]] = cir.const #cir.int<1> : !s64i
@@ -329,10 +329,10 @@ void l5() {
 // CIR:     cir.store{{.*}} %[[FOUR]], %[[STRIDE3]]
 // CIR:     cir.store{{.*}} %[[ARR_ADDR]], %[[RANGE_ADDR]]
 // CIR:     %[[RANGE_LOAD:.*]] = cir.load{{.*}} %[[RANGE_ADDR]]
-// CIR:     %[[RANGE_CAST:.*]] = cir.cast(array_to_ptrdecay, %[[RANGE_LOAD]] : {{.*}})
+// CIR:     %[[RANGE_CAST:.*]] = cir.cast array_to_ptrdecay %[[RANGE_LOAD]] : {{.*}}
 // CIR:     cir.store{{.*}} %[[RANGE_CAST]], %[[BEGIN_ADDR]]
 // CIR:     %[[BEGIN:.*]] = cir.load{{.*}} %[[RANGE_ADDR]]
-// CIR:     %[[BEGIN_CAST:.*]] = cir.cast(array_to_ptrdecay, %[[BEGIN]] : {{.*}})
+// CIR:     %[[BEGIN_CAST:.*]] = cir.cast array_to_ptrdecay %[[BEGIN]] : {{.*}}
 // CIR:     %[[FOUR:.*]] = cir.const #cir.int<4> : !s64i
 // CIR:     %[[END_PTR:.*]] = cir.ptr_stride(%[[BEGIN_CAST]] : {{.*}}, %[[FOUR]] : {{.*}})
 // CIR:     cir.store{{.*}} %[[END_PTR]], %[[END_ADDR]]
@@ -445,7 +445,7 @@ void test_do_while_false() {
 // CIR-NEXT:       cir.yield
 // CIR-NEXT:     } while {
 // CIR-NEXT:       %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
-// CIR-NEXT:       %[[FALSE:.*]] = cir.cast(int_to_bool, %[[ZERO]] : !s32i), !cir.bool
+// CIR-NEXT:       %[[FALSE:.*]] = cir.cast int_to_bool %[[ZERO]] : !s32i -> !cir.bool
 // CIR-NEXT:       cir.condition(%[[FALSE]])
 
 // LLVM: define{{.*}} void @_Z19test_do_while_falsev()
diff --git a/clang/test/CIR/CodeGen/new.cpp b/clang/test/CIR/CodeGen/new.cpp
index b14bf077cd154..91dae3f28c572 100644
--- a/clang/test/CIR/CodeGen/new.cpp
+++ b/clang/test/CIR/CodeGen/new.cpp
@@ -22,15 +22,15 @@ void test_basic_new() {
 // CHECK:   %[[PD_ADDR:.*]] = cir.alloca !cir.ptr<!cir.double>, !cir.ptr<!cir.ptr<!cir.double>>, ["pd", init]
 // CHECK:   %[[EIGHT:.*]] = cir.const #cir.int<8>
 // CHECK:   %[[NEW_S:.*]] = cir.call @_Znwm(%[[EIGHT]])
-// CHECK:   %[[NEW_S_PTR:.*]] = cir.cast(bitcast, %[[NEW_S]]
+// CHECK:   %[[NEW_S_PTR:.*]] = cir.cast bitcast %[[NEW_S]]
 // CHECK:   cir.store{{.*}} %[[NEW_S_PTR]], %[[PS_ADDR]]
 // CHECK:   %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK:   %[[NEW_INT:.*]] = cir.call @_Znwm(%[[FOUR]])
-// CHECK:   %[[NEW_INT_PTR:.*]] = cir.cast(bitcast, %[[NEW_INT]]
+// CHECK:   %[[NEW_INT_PTR:.*]] = cir.cast bitcast %[[NEW_INT]]
 // CHECK:   cir.store{{.*}} %[[NEW_INT_PTR]], %[[PN_ADDR]]
 // CHECK:   %[[EIGHT:.*]] = cir.const #cir.int<8>
 // CHECK:   %[[NEW_DOUBLE:.*]] = cir.call @_Znwm(%[[EIGHT]])
-// CHECK:   %[[NEW_DOUBLE_PTR:.*]] = cir.cast(bitcast, %[[NEW_DOUBLE]]
+// CHECK:   %[[NEW_DOUBLE_PTR:.*]] = cir.cast bitcast %[[NEW_DOUBLE]]
 // CHECK:   cir.store{{.*}} %[[NEW_DOUBLE_PTR]], %[[PD_ADDR]]
 // CHECK:   cir.return
 
@@ -68,13 +68,13 @@ void test_new_with_init() {
 // CHECK:   %[[PD_ADDR:.*]] = cir.alloca !cir.ptr<!cir.double>, !cir.ptr<!cir.ptr<!cir.double>>, ["pd", init]
 // CHECK:   %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK:   %[[NEW_INT:.*]] = cir.call @_Znwm(%[[FOUR]])
-// CHECK:   %[[NEW_INT_PTR:.*]] = cir.cast(bitcast, %[[NEW_INT]]
+// CHECK:   %[[NEW_INT_PTR:.*]] = cir.cast bitcast %[[NEW_INT]]
 // CHECK:   %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK:   cir.store{{.*}} %[[TWO]], %[[NEW_INT_PTR]]
 // CHECK:   cir.store{{.*}} %[[NEW_INT_PTR]], %[[PN_ADDR]]
 // CHECK:   %[[EIGHT:.*]] = cir.const #cir.int<8>
 // CHECK:   %[[NEW_DOUBLE:.*]] = cir.call @_Znwm(%[[EIGHT]])
-// CHECK:   %[[NEW_DOUBLE_PTR:.*]] = cir.cast(bitcast, %[[NEW_DOUBLE]]
+// CHECK:   %[[NEW_DOUBLE_PTR:.*]] = cir.cast bitcast %[[NEW_DOUBLE]]
 // CHECK:   %[[THREE:.*]] = cir.const #cir.fp<3.000000e+00>
 // CHECK:   cir.store{{.*}} %[[THREE]], %[[NEW_DOUBLE_PTR]]
 // CHECK:   cir.store{{.*}} %[[NEW_DOUBLE_PTR]], %[[PD_ADDR]]
@@ -119,12 +119,12 @@ void test_new_with_ctor() {
 // CHECK:   %[[PS2_2_ADDR:.*]] = cir.alloca !cir.ptr<!rec_S2>, !cir.ptr<!cir.ptr<!rec_S2>>, ["ps2_2", init]
 // CHECK:   %[[EIGHT:.*]] = cir.const #cir.int<8>
 // CHECK:   %[[NEW_S2:.*]] = cir.call @_Znwm(%[[EIGHT]])
-// CHECK:   %[[NEW_S2_PTR:.*]] = cir.cast(bitcast, %[[NEW_S2]]
+// CHECK:   %[[NEW_S2_PTR:.*]] = cir.cast bitcast %[[NEW_S2]]
 // CHECK:   cir.call @_ZN2S2C1Ev(%[[NEW_S2_PTR]])
 // CHECK:   cir.store{{.*}} %[[NEW_S2_PTR]], %[[PS2_ADDR]]
 // CHECK:   %[[EIGHT:.*]] = cir.const #cir.int<8>
 // CHECK:   %[[NEW_S2_2:.*]] = cir.call @_Znwm(%[[EIGHT]])
-// CHECK:   %[[NEW_S2_2_PTR:.*]] = cir.cast(bitcast, %[[NEW_S2_2]]
+// CHECK:   %[[NEW_S2_2_PTR:.*]] = cir.cast bitcast %[[NEW_S2_2]]
 // CHECK:   %[[ONE:.*]] = cir.const #cir.int<1>
 // CHECK:   %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK:   cir.call @_ZN2S2C1Eii(%[[NEW_S2_2_PTR]], %[[ONE]], %[[TWO]])
@@ -161,7 +161,7 @@ void test_new_with_complex_type() {
 // CHECK:   %0 = cir.alloca !cir.ptr<!cir.complex<!cir.float>>, !cir.ptr<!cir.ptr<!cir.complex<!cir.float>>>, ["a", init]
 // CHECK:   %1 = cir.const #cir.int<8> : !u64i
 // CHECK:   %2 = cir.call @_Znwm(%1) : (!u64i) -> !cir.ptr<!void>
-// CHECK:   %3 = cir.cast(bitcast, %2 : !cir.ptr<!void>), !cir.ptr<!cir.complex<!cir.float>>
+// CHECK:   %3 = cir.cast bitcast %2 : !cir.ptr<!void> -> !cir.ptr<!cir.complex<!cir.float>>
 // CHECK:   %4 = cir.const #cir.const_complex<#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00> : !cir.float> : !cir.complex<!cir.float>
 // CHECK:   cir.store align(8) %4, %3 : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
 // CHECK:   cir.store align(8) %3, %0 : !cir.ptr<!cir.complex<!cir.float>>, !cir.ptr<!cir.ptr<!cir.complex<!cir.float>>>
diff --git a/clang/test/CIR/CodeGen/no-prototype.c b/clang/test/CIR/CodeGen/no-prototype.c
index 4be6a94c12129..728c4b80b95a2 100644
--- a/clang/test/CIR/CodeGen/no-prototype.c
+++ b/clang/test/CIR/CodeGen/no-prototype.c
@@ -51,7 +51,7 @@ int test3(int x) {
 // CHECK: cir.func dso_local @test3
   return noProto3(x);
   // CHECK:  [[GGO:%.*]] = cir.get_global @noProto3 : !cir.ptr<!cir.func<(...) -> !s32i>>
-  // CHECK:  [[CAST:%.*]] = cir.cast(bitcast, [[GGO]] : !cir.ptr<!cir.func<(...) -> !s32i>>), !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+  // CHECK:  [[CAST:%.*]] = cir.cast bitcast [[GGO]] : !cir.ptr<!cir.func<(...) -> !s32i>> -> !cir.ptr<!cir.func<(!s32i) -> !s32i>>
   // CHECK:  {{%.*}} = cir.call [[CAST]](%{{[0-9]+}}) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
 }
 
@@ -68,7 +68,7 @@ int noProto4() { return 0; }
 int test4(int x) {
   return noProto4(x); // Even if we know the definition, this should compile.
   // CHECK:  [[GGO:%.*]] = cir.get_global @noProto4 : !cir.ptr<!cir.func<() -> !s32i>>
-  // CHECK:  [[CAST:%.*]] = cir.cast(bitcast, [[GGO]] : !cir.ptr<!cir.func<() -> !s32i>>), !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+  // CHECK:  [[CAST:%.*]] = cir.cast bitcast [[GGO]] : !cir.ptr<!cir.func<() -> !s32i>> -> !cir.ptr<!cir.func<(!s32i) -> !s32i>>
   // CHECK:  {{%.*}} = cir.call [[CAST]]({{%.*}}) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
 }
 
@@ -77,7 +77,7 @@ int noProto5();
 int test5(int x) {
   return noProto5();
   // CHECK:  [[GGO:%.*]] = cir.get_global @noProto5 : !cir.ptr<!cir.func<(!s32i) -> !s32i>>
-  // CHECK:  [[CAST:%.*]] = cir.cast(bitcast, [[GGO]] : !cir.ptr<!cir.func<(!s32i) -> !s32i>>), !cir.ptr<!cir.func<() -> !s32i>>
+  // CHECK:  [[CAST:%.*]] = cir.cast bitcast [[GGO]] : !cir.ptr<!cir.func<(!s32i) -> !s32i>> -> !cir.ptr<!cir.func<() -> !s32i>>
   // CHECK:  {{%.*}} = cir.call [[CAST]]() : (!cir.ptr<!cir.func<() -> !s32i>>) -> !s32i
 }
 int noProto5(int x) { return x; }
diff --git a/clang/test/CIR/CodeGen/opaque.c b/clang/test/CIR/CodeGen/opaque.c
index 96ecdfc4cd978..73f6402e8a484 100644
--- a/clang/test/CIR/CodeGen/opaque.c
+++ b/clang/test/CIR/CodeGen/opaque.c
@@ -17,8 +17,8 @@ void foo2() {
 // CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
 // CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
 // CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
-// CIR: %[[A_REAL_BOOL:.*]] = cir.cast(float_to_bool, %[[A_REAL]] : !cir.float), !cir.bool
-// CIR: %[[A_IMAG_BOOL:.*]] = cir.cast(float_to_bool, %[[A_IMAG]] : !cir.float), !cir.bool
+// CIR: %[[A_REAL_BOOL:.*]] = cir.cast float_to_bool %[[A_REAL]] : !cir.float -> !cir.bool
+// CIR: %[[A_IMAG_BOOL:.*]] = cir.cast float_to_bool %[[A_IMAG]] : !cir.float -> !cir.bool
 // CIR: %[[CONST_TRUE:.*]] = cir.const #true
 // CIR: %[[COND:.*]] = cir.select if %[[A_REAL_BOOL]] then %[[CONST_TRUE]] else %[[A_IMAG_BOOL]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
 // CIR: %[[RESULT:.*]] = cir.ternary(%[[COND]], true {
diff --git a/clang/test/CIR/CodeGen/opaque.cpp b/clang/test/CIR/CodeGen/opaque.cpp
index a48c013e5c20b..028bfd9ef4cd0 100644
--- a/clang/test/CIR/CodeGen/opaque.cpp
+++ b/clang/test/CIR/CodeGen/opaque.cpp
@@ -35,8 +35,8 @@ void foo2() {
 // CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
 // CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
 // CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
-// CIR: %[[A_REAL_BOOL:.*]] = cir.cast(float_to_bool, %[[A_REAL]] : !cir.float), !cir.bool
-// CIR: %[[A_IMAG_BOOL:.*]] = cir.cast(float_to_bool, %[[A_IMAG]] : !cir.float), !cir.bool
+// CIR: %[[A_REAL_BOOL:.*]] = cir.cast float_to_bool %[[A_REAL]] : !cir.float -> !cir.bool
+// CIR: %[[A_IMAG_BOOL:.*]] = cir.cast float_to_bool %[[A_IMAG]] : !cir.float -> !cir.bool
 // CIR: %[[CONST_TRUE:.*]] = cir.const #true
 // CIR: %[[COND:.*]] = cir.select if %[[A_REAL_BOOL]] then %[[CONST_TRUE]] else %[[A_IMAG_BOOL]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
 // CIR: %[[RESULT:.*]] = cir.ternary(%[[COND]], true {
@@ -111,7 +111,7 @@ void foo3() {
 // CIR: %[[B_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b"]
 // CIR: %[[C_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["c", init]
 // CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!s32i>, !s32i
-// CIR: %[[A_BOOL:.*]] = cir.cast(int_to_bool, %[[TMP_A]] : !s32i), !cir.bool
+// CIR: %[[A_BOOL:.*]] = cir.cast int_to_bool %[[TMP_A]] : !s32i -> !cir.bool
 // CIR: %[[RESULT:.*]] = cir.ternary(%[[A_BOOL]], true {
 // CIR:   %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!s32i>, !s32i
 // CIR:   cir.yield %[[TMP_A]] : !s32i
diff --git a/clang/test/CIR/CodeGen/pointers.cpp b/clang/test/CIR/CodeGen/pointers.cpp
index dcfcc723f4da1..2c3dbb0fd6c58 100644
--- a/clang/test/CIR/CodeGen/pointers.cpp
+++ b/clang/test/CIR/CodeGen/pointers.cpp
@@ -24,7 +24,7 @@ void foo(int *iptr, char *cptr, unsigned ustride) {
   // Must convert unsigned stride to a signed one.
   iptr - ustride;
   // CHECK: %[[#STRIDE:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u32i>, !u32i
-  // CHECK: %[[#SIGNSTRIDE:]] = cir.cast(integral, %[[#STRIDE]] : !u32i), !s32i
+  // CHECK: %[[#SIGNSTRIDE:]] = cir.cast integral %[[#STRIDE]] : !u32i -> !s32i
   // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#SIGNSTRIDE]]) : !s32i, !s32i
   // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s32i>
 
diff --git a/clang/test/CIR/CodeGen/ternary.cpp b/clang/test/CIR/CodeGen/ternary.cpp
index 781286a94cc2e..eb38ee3083e5c 100644
--- a/clang/test/CIR/CodeGen/ternary.cpp
+++ b/clang/test/CIR/CodeGen/ternary.cpp
@@ -69,7 +69,7 @@ int foo(int a, int b) {
 // CIR: [[ALOAD2:%.+]] = cir.load align(4) [[A]] : !cir.ptr<!s32i>, !s32i
 // CIR: cir.yield [[ALOAD2]] : !s32i
 // CIR: }) : (!cir.bool) -> !s32i
-// CIR: [[CAST:%.+]] = cir.cast(int_to_bool, [[TERNARY_RES]] : !s32i), !cir.bool
+// CIR: [[CAST:%.+]] = cir.cast int_to_bool [[TERNARY_RES]] : !s32i -> !cir.bool
 // CIR: cir.if [[CAST]] {
 // CIR: [[ONE:%.+]] = cir.const #cir.int<1> : !s32i
 // CIR: [[MINUS_ONE:%.+]] = cir.unary(minus, [[ONE]]) nsw : !s32i, !s32i
diff --git a/clang/test/CIR/CodeGen/unary.cpp b/clang/test/CIR/CodeGen/unary.cpp
index c37524bc8b2c9..ac1ae344c6b48 100644
--- a/clang/test/CIR/CodeGen/unary.cpp
+++ b/clang/test/CIR/CodeGen/unary.cpp
@@ -410,10 +410,10 @@ void chars(char c) {
 // CHECK: cir.func{{.*}} @_Z5charsc
 
   int c1 = +c;
-  // CHECK: %[[PROMO:.*]] = cir.cast(integral, %{{.+}} : !s8i), !s32i
+  // CHECK: %[[PROMO:.*]] = cir.cast integral %{{.+}} : !s8i -> !s32i
   // CHECK: cir.unary(plus, %[[PROMO]]) : !s32i, !s32i
   int c2 = -c;
-  // CHECK: %[[PROMO:.*]] = cir.cast(integral, %{{.+}} : !s8i), !s32i
+  // CHECK: %[[PROMO:.*]] = cir.cast integral %{{.+}} : !s8i -> !s32i
   // CHECK: cir.unary(minus, %[[PROMO]]) nsw : !s32i, !s32i
 
   // Chars can go through some integer promotion codegen paths even when not promoted.
@@ -431,9 +431,9 @@ _Float16 fp16UPlus(_Float16 f) {
 
 // CHECK: cir.func{{.*}} @_Z9fp16UPlusDF16_({{.*}}) -> !cir.f16
 // CHECK:   %[[INPUT:.*]] = cir.load{{.*}} %[[F:.*]]
-// CHECK:   %[[PROMOTED:.*]] = cir.cast(floating, %[[INPUT]] : !cir.f16), !cir.float
+// CHECK:   %[[PROMOTED:.*]] = cir.cast floating %[[INPUT]] : !cir.f16 -> !cir.float
 // CHECK:   %[[RESULT:.*]] = cir.unary(plus, %[[PROMOTED]])
-// CHECK:   %[[UNPROMOTED:.*]] = cir.cast(floating, %[[RESULT]] : !cir.float), !cir.f16
+// CHECK:   %[[UNPROMOTED:.*]] = cir.cast floating %[[RESULT]] : !cir.float -> !cir.f16
 
 // LLVM: define{{.*}} half @_Z9fp16UPlusDF16_({{.*}})
 // LLVM:   %[[F_LOAD:.*]] = load half, ptr %{{.*}}, align 2
@@ -451,9 +451,9 @@ _Float16 fp16UMinus(_Float16 f) {
 
 // CHECK: cir.func{{.*}} @_Z10fp16UMinusDF16_({{.*}}) -> !cir.f16
 // CHECK:   %[[INPUT:.*]] = cir.load{{.*}} %[[F:.*]]
-// CHECK:   %[[PROMOTED:.*]] = cir.cast(floating, %[[INPUT]] : !cir.f16), !cir.float
+// CHECK:   %[[PROMOTED:.*]] = cir.cast floating %[[INPUT]] : !cir.f16 -> !cir.float
 // CHECK:   %[[RESULT:.*]] = cir.unary(minus, %[[PROMOTED]])
-// CHECK:   %[[UNPROMOTED:.*]] = cir.cast(floating, %[[RESULT]] : !cir.float), !cir.f16
+// CHECK:   %[[UNPROMOTED:.*]] = cir.cast floating %[[RESULT]] : !cir.float -> !cir.f16
 
 // LLVM: define{{.*}} half @_Z10fp16UMinusDF16_({{.*}})
 // LLVM:   %[[F_LOAD:.*]] = load half, ptr %{{.*}}, align 2
@@ -482,24 +482,24 @@ void test_logical_not() {
 
 // CHECK: cir.func{{.*}} @_Z16test_logical_notv()
 // CHECK:   %[[A:.*]] = cir.load{{.*}} %[[A_ADDR:.*]] : !cir.ptr<!s32i>, !s32i
-// CHECK:   %[[A_BOOL:.*]] = cir.cast(int_to_bool, %[[A]] : !s32i), !cir.bool
+// CHECK:   %[[A_BOOL:.*]] = cir.cast int_to_bool %[[A]] : !s32i -> !cir.bool
 // CHECK:   %[[A_NOT:.*]] = cir.unary(not, %[[A_BOOL]]) : !cir.bool, !cir.bool
-// CHECK:   %[[A_CAST:.*]] = cir.cast(bool_to_int, %[[A_NOT]] : !cir.bool), !s32i
+// CHECK:   %[[A_CAST:.*]] = cir.cast bool_to_int %[[A_NOT]] : !cir.bool -> !s32i
 // CHECK:   cir.store{{.*}} %[[A_CAST]], %[[A_ADDR]] : !s32i, !cir.ptr<!s32i>
 // CHECK:   %[[B:.*]] = cir.load{{.*}} %[[B_ADDR:.*]] : !cir.ptr<!cir.bool>, !cir.bool
 // CHECK:   %[[B_NOT:.*]] = cir.unary(not, %[[B]]) : !cir.bool, !cir.bool
 // CHECK:   cir.store{{.*}} %[[B_NOT]], %[[B_ADDR]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK:   %[[C:.*]] = cir.load{{.*}} %[[C_ADDR:.*]] : !cir.ptr<!cir.float>, !cir.float
-// CHECK:   %[[C_BOOL:.*]] = cir.cast(float_to_bool, %[[C]] : !cir.float), !cir.bool
+// CHECK:   %[[C_BOOL:.*]] = cir.cast float_to_bool %[[C]] : !cir.float -> !cir.bool
 // CHECK:   %[[C_NOT:.*]] = cir.unary(not, %[[C_BOOL]]) : !cir.bool, !cir.bool
-// CHECK:   %[[C_CAST:.*]] = cir.cast(bool_to_float, %[[C_NOT]] : !cir.bool), !cir.float
+// CHECK:   %[[C_CAST:.*]] = cir.cast bool_to_float %[[C_NOT]] : !cir.bool -> !cir.float
 // CHECK:   cir.store{{.*}} %[[C_CAST]], %[[C_ADDR]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK:   %[[P:.*]] = cir.load{{.*}} %[[P_ADDR:.*]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
-// CHECK:   %[[P_BOOL:.*]] = cir.cast(ptr_to_bool, %[[P]] : !cir.ptr<!s32i>), !cir.bool
+// CHECK:   %[[P_BOOL:.*]] = cir.cast ptr_to_bool %[[P]] : !cir.ptr<!s32i> -> !cir.bool
 // CHECK:   %[[P_NOT:.*]] = cir.unary(not, %[[P_BOOL]]) : !cir.bool, !cir.bool
 // CHECK:   cir.store{{.*}} %[[P_NOT]], %[[B_ADDR]] : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK:   %[[D:.*]] = cir.load{{.*}} %[[D_ADDR:.*]] : !cir.ptr<!cir.double>, !cir.double
-// CHECK:   %[[D_BOOL:.*]] = cir.cast(float_to_bool, %[[D]] : !cir.double), !cir.bool
+// CHECK:   %[[D_BOOL:.*]] = cir.cast float_to_bool %[[D]] : !cir.double -> !cir.bool
 // CHECK:   %[[D_NOT:.*]] = cir.unary(not, %[[D_BOOL]]) : !cir.bool, !cir.bool
 // CHECK:   cir.store{{.*}} %[[D_NOT]], %[[B_ADDR]] : !cir.bool, !cir.ptr<!cir.bool>
 
@@ -566,10 +566,10 @@ void f16NestedUPlus() {
 // CHECK:  %[[A_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["a"]
 // CHECK:  %[[B_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["b", init]
 // CHECK:  %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.f16>, !cir.f16
-// CHECK:  %[[A_F32:.*]] = cir.cast(floating, %[[TMP_A]] : !cir.f16), !cir.float
+// CHECK:  %[[A_F32:.*]] = cir.cast floating %[[TMP_A]] : !cir.f16 -> !cir.float
 // CHECK:  %[[A_PLUS:.*]] = cir.unary(plus, %[[A_F32]]) : !cir.float, !cir.float
 // CHECK:  %[[RESULT_F32:.*]] = cir.unary(plus, %[[A_PLUS]]) : !cir.float, !cir.float
-// CHECK:  %[[RESULT:.*]] = cir.cast(floating, %[[RESULT_F32]] : !cir.float), !cir.f16
+// CHECK:  %[[RESULT:.*]] = cir.cast floating %[[RESULT_F32]] : !cir.float -> !cir.f16
 // CHECK:  cir.store{{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16>
 
 // LLVM: define{{.*}} void @_Z14f16NestedUPlusv()
@@ -597,10 +597,10 @@ void f16NestedUMinus() {
 // CHECK:  %[[A_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["a"]
 // CHECK:  %[[B_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["b", init]
 // CHECK:  %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.f16>, !cir.f16
-// CHECK:  %[[A_F32:.*]] = cir.cast(floating, %[[TMP_A]] : !cir.f16), !cir.float
+// CHECK:  %[[A_F32:.*]] = cir.cast floating %[[TMP_A]] : !cir.f16 -> !cir.float
 // CHECK:  %[[A_MINUS:.*]] = cir.unary(minus, %[[A_F32]]) : !cir.float, !cir.float
 // CHECK:  %[[RESULT_F32:.*]] = cir.unary(minus, %[[A_MINUS]]) : !cir.float, !cir.float
-// CHECK:  %[[RESULT:.*]] = cir.cast(floating, %[[RESULT_F32]] : !cir.float), !cir.f16
+// CHECK:  %[[RESULT:.*]] = cir.cast floating %[[RESULT_F32]] : !cir.float -> !cir.f16
 // CHECK:  cir.store{{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16>
 
 // LLVM: define{{.*}} void @_Z15f16NestedUMinusv()
diff --git a/clang/test/CIR/CodeGen/union.c b/clang/test/CIR/CodeGen/union.c
index 23e862b24517d..bda8e77b89048 100644
--- a/clang/test/CIR/CodeGen/union.c
+++ b/clang/test/CIR/CodeGen/union.c
@@ -116,7 +116,7 @@ void shouldGenerateUnionAccess(union U2 u) {
 // CIR-NEXT:   %[[U:.*]] = cir.alloca !rec_U2, !cir.ptr<!rec_U2>, ["u", init] {alignment = 8 : i64}
 // CIR-NEXT:   cir.store{{.*}} %[[ARG]], %[[U]] : !rec_U2, !cir.ptr<!rec_U2>
 // CIR-NEXT:   %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
-// CIR-NEXT:   %[[ZERO_CHAR:.*]] = cir.cast(integral, %[[ZERO]] : !s32i), !s8i
+// CIR-NEXT:   %[[ZERO_CHAR:.*]] = cir.cast integral %[[ZERO]] : !s32i -> !s8i
 // CIR-NEXT:   %[[B_PTR:.*]] = cir.get_member %[[U]][0] {name = "b"} : !cir.ptr<!rec_U2> -> !cir.ptr<!s8i>
 // CIR-NEXT:   cir.store{{.*}} %[[ZERO_CHAR]], %[[B_PTR]] : !s8i, !cir.ptr<!s8i>
 // CIR-NEXT:   %[[B_PTR2:.*]] = cir.get_member %[[U]][0] {name = "b"} : !cir.ptr<!rec_U2> -> !cir.ptr<!s8i>
@@ -174,10 +174,10 @@ void f3(union U3 u) {
 // CIR-NEXT:   %[[U:.*]] = cir.alloca !rec_U3, !cir.ptr<!rec_U3>, ["u", init] {alignment = 1 : i64}
 // CIR-NEXT:   cir.store{{.*}} %[[ARG]], %[[U]] : !rec_U3, !cir.ptr<!rec_U3>
 // CIR-NEXT:   %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
-// CIR-NEXT:   %[[ZERO_CHAR:.*]] = cir.cast(integral, %[[ZERO]] : !s32i), !s8i
+// CIR-NEXT:   %[[ZERO_CHAR:.*]] = cir.cast integral %[[ZERO]] : !s32i -> !s8i
 // CIR-NEXT:   %[[IDX:.*]] = cir.const #cir.int<2> : !s32i
 // CIR-NEXT:   %[[C_PTR:.*]] = cir.get_member %[[U]][0] {name = "c"} : !cir.ptr<!rec_U3> -> !cir.ptr<!cir.array<!s8i x 5>>
-// CIR-NEXT:   %[[C_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[C_PTR]] : !cir.ptr<!cir.array<!s8i x 5>>), !cir.ptr<!s8i>
+// CIR-NEXT:   %[[C_DECAY:.*]] = cir.cast array_to_ptrdecay %[[C_PTR]] : !cir.ptr<!cir.array<!s8i x 5>> -> !cir.ptr<!s8i>
 // CIR-NEXT:   %[[ELEM_PTR:.*]] = cir.ptr_stride(%[[C_DECAY]] : !cir.ptr<!s8i>, %[[IDX]] : !s32i), !cir.ptr<!s8i>
 // CIR-NEXT:   cir.store{{.*}} %[[ZERO_CHAR]], %[[ELEM_PTR]] : !s8i, !cir.ptr<!s8i>
 // CIR-NEXT:   cir.return
@@ -206,10 +206,10 @@ void f5(union U4 u) {
 // CIR-NEXT:   %[[U:.*]] = cir.alloca !rec_U4, !cir.ptr<!rec_U4>, ["u", init] {alignment = 4 : i64}
 // CIR-NEXT:   cir.store{{.*}} %[[ARG]], %[[U]] : !rec_U4, !cir.ptr<!rec_U4>
 // CIR-NEXT:   %[[CHAR_VAL:.*]] = cir.const #cir.int<65> : !s32i
-// CIR-NEXT:   %[[CHAR_CAST:.*]] = cir.cast(integral, %[[CHAR_VAL]] : !s32i), !s8i
+// CIR-NEXT:   %[[CHAR_CAST:.*]] = cir.cast integral %[[CHAR_VAL]] : !s32i -> !s8i
 // CIR-NEXT:   %[[IDX:.*]] = cir.const #cir.int<4> : !s32i
 // CIR-NEXT:   %[[C_PTR:.*]] = cir.get_member %[[U]][0] {name = "c"} : !cir.ptr<!rec_U4> -> !cir.ptr<!cir.array<!s8i x 5>>
-// CIR-NEXT:   %[[C_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[C_PTR]] : !cir.ptr<!cir.array<!s8i x 5>>), !cir.ptr<!s8i>
+// CIR-NEXT:   %[[C_DECAY:.*]] = cir.cast array_to_ptrdecay %[[C_PTR]] : !cir.ptr<!cir.array<!s8i x 5>> -> !cir.ptr<!s8i>
 // CIR-NEXT:   %[[ELEM_PTR:.*]] = cir.ptr_stride(%[[C_DECAY]] : !cir.ptr<!s8i>, %[[IDX]] : !s32i), !cir.ptr<!s8i>
 // CIR-NEXT:   cir.store{{.*}} %[[CHAR_CAST]], %[[ELEM_PTR]] : !s8i, !cir.ptr<!s8i>
 // CIR-NEXT:   cir.return
diff --git a/clang/test/CIR/CodeGen/var_arg.c b/clang/test/CIR/CodeGen/var_arg.c
index e9c4acb15d009..f5b92c61e11ad 100644
--- a/clang/test/CIR/CodeGen/var_arg.c
+++ b/clang/test/CIR/CodeGen/var_arg.c
@@ -23,13 +23,13 @@ int varargs(int count, ...) {
 // CIR:   %[[VAAREA:.+]] = cir.alloca !cir.array<!rec___va_list_tag x 1>, !cir.ptr<!cir.array<!rec___va_list_tag x 1>>, ["args"]
 // CIR:   %[[RES_ADDR:.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["res", init]
 // CIR:   cir.store %arg0, %[[COUNT_ADDR]] : !s32i, !cir.ptr<!s32i>
-// CIR:   %[[VA_PTR0:.+]] = cir.cast(array_to_ptrdecay, %[[VAAREA]] : !cir.ptr<!cir.array<!rec___va_list_tag x 1>>), !cir.ptr<!rec___va_list_tag>
+// CIR:   %[[VA_PTR0:.+]] = cir.cast array_to_ptrdecay %[[VAAREA]] : !cir.ptr<!cir.array<!rec___va_list_tag x 1>> -> !cir.ptr<!rec___va_list_tag>
 // CIR:   %[[COUNT_VAL:.+]] = cir.load{{.*}} %[[COUNT_ADDR]] : !cir.ptr<!s32i>, !s32i
 // CIR:   cir.va_start %[[VA_PTR0]] %[[COUNT_VAL]] : !cir.ptr<!rec___va_list_tag>, !s32i
-// CIR:   %[[VA_PTR1:.+]] = cir.cast(array_to_ptrdecay, %[[VAAREA]] : !cir.ptr<!cir.array<!rec___va_list_tag x 1>>), !cir.ptr<!rec___va_list_tag>
+// CIR:   %[[VA_PTR1:.+]] = cir.cast array_to_ptrdecay %[[VAAREA]] : !cir.ptr<!cir.array<!rec___va_list_tag x 1>> -> !cir.ptr<!rec___va_list_tag>
 // CIR:   %[[VA_ARG:.+]] = cir.va_arg %[[VA_PTR1]] : (!cir.ptr<!rec___va_list_tag>) -> !s32i
 // CIR:   cir.store{{.*}} %[[VA_ARG]], %[[RES_ADDR]] : !s32i, !cir.ptr<!s32i>
-// CIR:   %[[VA_PTR2:.+]] = cir.cast(array_to_ptrdecay, %[[VAAREA]] : !cir.ptr<!cir.array<!rec___va_list_tag x 1>>), !cir.ptr<!rec___va_list_tag>
+// CIR:   %[[VA_PTR2:.+]] = cir.cast array_to_ptrdecay %[[VAAREA]] : !cir.ptr<!cir.array<!rec___va_list_tag x 1>> -> !cir.ptr<!rec___va_list_tag>
 // CIR:   cir.va_end %[[VA_PTR2]] : !cir.ptr<!rec___va_list_tag>
 // CIR:   %[[RESULT:.+]] = cir.load{{.*}} %[[RES_ADDR]] : !cir.ptr<!s32i>, !s32i
 // CIR:   cir.store %[[RESULT]], %[[RET_ADDR]] : !s32i, !cir.ptr<!s32i>
@@ -99,13 +99,13 @@ int stdarg_start(int count, ...) {
 // CIR:   %[[VAAREA:.+]] = cir.alloca !cir.array<!rec___va_list_tag x 1>, !cir.ptr<!cir.array<!rec___va_list_tag x 1>>, ["args"]
 // CIR:   %[[RES_ADDR:.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["res", init]
 // CIR:   cir.store %arg0, %[[COUNT_ADDR]] : !s32i, !cir.ptr<!s32i>
-// CIR:   %[[VA_PTR0:.+]] = cir.cast(array_to_ptrdecay, %[[VAAREA]] : !cir.ptr<!cir.array<!rec___va_list_tag x 1>>), !cir.ptr<!rec___va_list_tag>
+// CIR:   %[[VA_PTR0:.+]] = cir.cast array_to_ptrdecay %[[VAAREA]] : !cir.ptr<!cir.array<!rec___va_list_tag x 1>> -> !cir.ptr<!rec___va_list_tag>
 // CIR:   %[[C12345:.+]] = cir.const #cir.int<12345> : !s32i
 // CIR:   cir.va_start %[[VA_PTR0]] %[[C12345]] : !cir.ptr<!rec___va_list_tag>, !s32i
-// CIR:   %[[VA_PTR1:.+]] = cir.cast(array_to_ptrdecay, %[[VAAREA]] : !cir.ptr<!cir.array<!rec___va_list_tag x 1>>), !cir.ptr<!rec___va_list_tag>
+// CIR:   %[[VA_PTR1:.+]] = cir.cast array_to_ptrdecay %[[VAAREA]] : !cir.ptr<!cir.array<!rec___va_list_tag x 1>> -> !cir.ptr<!rec___va_list_tag>
 // CIR:   %[[VA_ARG:.+]] = cir.va_arg %[[VA_PTR1]] : (!cir.ptr<!rec___va_list_tag>) -> !s32i
 // CIR:   cir.store{{.*}} %[[VA_ARG]], %[[RES_ADDR]] : !s32i, !cir.ptr<!s32i>
-// CIR:   %[[VA_PTR2:.+]] = cir.cast(array_to_ptrdecay, %[[VAAREA]] : !cir.ptr<!cir.array<!rec___va_list_tag x 1>>), !cir.ptr<!rec___va_list_tag>
+// CIR:   %[[VA_PTR2:.+]] = cir.cast array_to_ptrdecay %[[VAAREA]] : !cir.ptr<!cir.array<!rec___va_list_tag x 1>> -> !cir.ptr<!rec___va_list_tag>
 // CIR:   cir.va_end %[[VA_PTR2]] : !cir.ptr<!rec___va_list_tag>
 // CIR:   %[[RESULT:.+]] = cir.load{{.*}} %[[RES_ADDR]] : !cir.ptr<!s32i>, !s32i
 // CIR:   cir.store %[[RESULT]], %[[RET_ADDR]] : !s32i, !cir.ptr<!s32i>
diff --git a/clang/test/CIR/CodeGen/variable-decomposition.cpp b/clang/test/CIR/CodeGen/variable-decomposition.cpp
index 40dfe73c411c9..ba59109ab625f 100644
--- a/clang/test/CIR/CodeGen/variable-decomposition.cpp
+++ b/clang/test/CIR/CodeGen/variable-decomposition.cpp
@@ -27,7 +27,7 @@ float function() {
 // CIR:  cir.store{{.*}} %[[TWO_FP]], %[[MEMBER_B]]
 // CIR:  %[[MEMBER_A:.+]] = cir.get_member %[[STRUCT]][0] {name = "a"} : !cir.ptr<!rec_some_struct> -> !cir.ptr<!s32i>
 // CIR:  %[[LOAD_A:.+]] = cir.load align(4) %[[MEMBER_A]] : !cir.ptr<!s32i>, !s32i
-// CIR:  %[[CAST_A:.+]] = cir.cast(int_to_float, %[[LOAD_A]] : !s32i), !cir.float
+// CIR:  %[[CAST_A:.+]] = cir.cast int_to_float %[[LOAD_A]] : !s32i -> !cir.float
 // CIR:  %[[MEMBER_B:.+]] = cir.get_member %[[STRUCT]][1] {name = "b"} : !cir.ptr<!rec_some_struct> -> !cir.ptr<!cir.float>
 // CIR:  %[[LOAD_B:.+]] = cir.load align(4) %[[MEMBER_B]] : !cir.ptr<!cir.float>, !cir.float
 // CIR:  %[[ADD:.+]] = cir.binop(add, %[[CAST_A]], %[[LOAD_B]]) : !cir.float
diff --git a/clang/test/CIR/CodeGen/vbase.cpp b/clang/test/CIR/CodeGen/vbase.cpp
index 4d57f8ea74e0c..86469c5d6ae7d 100644
--- a/clang/test/CIR/CodeGen/vbase.cpp
+++ b/clang/test/CIR/CodeGen/vbase.cpp
@@ -62,15 +62,15 @@ void ppp() { B b; }
 // CIR:   cir.call @_ZN7DerivedC1Ev(%[[D]]) nothrow : (!cir.ptr<!rec_Derived>) -> ()
 // CIR:   %[[VPTR_PTR:.+]] = cir.vtable.get_vptr %[[D]] : !cir.ptr<!rec_Derived> -> !cir.ptr<!cir.vptr>
 // CIR:   %[[VPTR:.+]] = cir.load {{.*}} %[[VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr
-// CIR:   %[[VPTR_I8:.+]] = cir.cast(bitcast, %[[VPTR]] : !cir.vptr), !cir.ptr<!u8i>
+// CIR:   %[[VPTR_I8:.+]] = cir.cast bitcast %[[VPTR]] : !cir.vptr -> !cir.ptr<!u8i>
 // CIR:   %[[NEG32:.+]] = cir.const #cir.int<-32> : !s64i
 // CIR:   %[[ADJ_VPTR_I8:.+]] = cir.ptr_stride(%[[VPTR_I8]] : !cir.ptr<!u8i>, %[[NEG32]] : !s64i), !cir.ptr<!u8i>
-// CIR:   %[[OFFSET_PTR:.+]] = cir.cast(bitcast, %[[ADJ_VPTR_I8]] : !cir.ptr<!u8i>), !cir.ptr<!s64i>
+// CIR:   %[[OFFSET_PTR:.+]] = cir.cast bitcast %[[ADJ_VPTR_I8]] : !cir.ptr<!u8i> -> !cir.ptr<!s64i>
 // CIR:   %[[OFFSET:.+]] = cir.load {{.*}} %[[OFFSET_PTR]] : !cir.ptr<!s64i>, !s64i
-// CIR:   %[[D_I8:.+]] = cir.cast(bitcast, %[[D]] : !cir.ptr<!rec_Derived>), !cir.ptr<!u8i>
+// CIR:   %[[D_I8:.+]] = cir.cast bitcast %[[D]] : !cir.ptr<!rec_Derived> -> !cir.ptr<!u8i>
 // CIR:   %[[ADJ_THIS_I8:.+]] = cir.ptr_stride(%[[D_I8]] : !cir.ptr<!u8i>, %[[OFFSET]] : !s64i), !cir.ptr<!u8i>
-// CIR:   %[[ADJ_THIS_D:.+]] = cir.cast(bitcast, %[[ADJ_THIS_I8]] : !cir.ptr<!u8i>), !cir.ptr<!rec_Derived>
-// CIR:   %[[BASE_THIS:.+]] = cir.cast(bitcast, %[[ADJ_THIS_D]] : !cir.ptr<!rec_Derived>), !cir.ptr<!rec_Base>
+// CIR:   %[[ADJ_THIS_D:.+]] = cir.cast bitcast %[[ADJ_THIS_I8]] : !cir.ptr<!u8i> -> !cir.ptr<!rec_Derived>
+// CIR:   %[[BASE_THIS:.+]] = cir.cast bitcast %[[ADJ_THIS_D]] : !cir.ptr<!rec_Derived> -> !cir.ptr<!rec_Base>
 // CIR:   %[[BASE_VPTR_PTR:.+]] = cir.vtable.get_vptr %[[BASE_THIS]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr>
 // CIR:   %[[BASE_VPTR:.+]] = cir.load {{.*}} %[[BASE_VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr
 // CIR:   %[[SLOT_PTR:.+]] = cir.vtable.get_virtual_fn_addr %[[BASE_VPTR]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>
diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp
index 8bca48d8ffe0c..2fd493f87c1ee 100644
--- a/clang/test/CIR/CodeGen/vector-ext.cpp
+++ b/clang/test/CIR/CodeGen/vector-ext.cpp
@@ -1048,7 +1048,7 @@ void foo17() {
 
 // CIR: %[[VEC_A:.*]] = cir.alloca !cir.vector<2 x !cir.double>, !cir.ptr<!cir.vector<2 x !cir.double>>, ["a"]
 // CIR: %[[TMP:.*]] = cir.load{{.*}} %[[VEC_A]] : !cir.ptr<!cir.vector<2 x !cir.double>>, !cir.vector<2 x !cir.double>
-// CIR: %[[RES:.*]] = cir.cast(float_to_int, %[[TMP]] : !cir.vector<2 x !cir.double>), !cir.vector<2 x !u16i>
+// CIR: %[[RES:.*]] = cir.cast float_to_int %[[TMP]] : !cir.vector<2 x !cir.double> -> !cir.vector<2 x !u16i>
 
 // LLVM: %[[VEC_A:.*]] = alloca <2 x double>, i64 1, align 16
 // LLVM: %[[TMP:.*]] = load <2 x double>, ptr %[[VEC_A]], align 16
@@ -1228,11 +1228,11 @@ void foo24() {
 // CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["b"]
 // CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["c", init]
 // CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !cir.f16>>, !cir.vector<4 x !cir.f16>
-// CIR: %[[TMP_A_F16:.*]] = cir.cast(floating, %[[TMP_A]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float>
+// CIR: %[[TMP_A_F16:.*]] = cir.cast floating %[[TMP_A]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
 // CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !cir.f16>>, !cir.vector<4 x !cir.f16>
-// CIR: %[[TMP_B_F16:.*]] = cir.cast(floating, %[[TMP_B]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float>
+// CIR: %[[TMP_B_F16:.*]] = cir.cast floating %[[TMP_B]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
 // CIR: %[[RESULT:.*]] = cir.binop(add, %[[TMP_A_F16]], %[[TMP_B_F16]]) : !cir.vector<4 x !cir.float>
-// CIR: %[[RESULT_VF16:.*]] = cir.cast(floating, %[[RESULT]] : !cir.vector<4 x !cir.float>), !cir.vector<4 x !cir.f16>
+// CIR: %[[RESULT_VF16:.*]] = cir.cast floating %[[RESULT]] : !cir.vector<4 x !cir.float> -> !cir.vector<4 x !cir.f16>
 // CIR: cir.store{{.*}} %[[RESULT_VF16]], %[[C_ADDR]] : !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>
 
 // LLVM: %[[A_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp
index f242779502148..86551d277fa71 100644
--- a/clang/test/CIR/CodeGen/vector.cpp
+++ b/clang/test/CIR/CodeGen/vector.cpp
@@ -1035,7 +1035,7 @@ void foo17() {
 
 // CIR: %[[VEC_A:.*]] = cir.alloca !cir.vector<2 x !cir.double>, !cir.ptr<!cir.vector<2 x !cir.double>>, ["a"]
 // CIR: %[[TMP:.*]] = cir.load{{.*}} %[[VEC_A]] : !cir.ptr<!cir.vector<2 x !cir.double>>, !cir.vector<2 x !cir.double>
-// CIR: %[[RES:.*]] = cir.cast(float_to_int, %[[TMP]] : !cir.vector<2 x !cir.double>), !cir.vector<2 x !u16i>
+// CIR: %[[RES:.*]] = cir.cast float_to_int %[[TMP]] : !cir.vector<2 x !cir.double> -> !cir.vector<2 x !u16i>
 
 // LLVM: %[[VEC_A:.*]] = alloca <2 x double>, i64 1, align 16
 // LLVM: %[[TMP:.*]] = load <2 x double>, ptr %[[VEC_A]], align 16
@@ -1270,11 +1270,11 @@ void foo27() {
 // CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["b"]
 // CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["c", init]
 // CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !cir.f16>>, !cir.vector<4 x !cir.f16>
-// CIR: %[[TMP_A_F16:.*]] = cir.cast(floating, %[[TMP_A]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float>
+// CIR: %[[TMP_A_F16:.*]] = cir.cast floating %[[TMP_A]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
 // CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !cir.f16>>, !cir.vector<4 x !cir.f16>
-// CIR: %[[TMP_B_F16:.*]] = cir.cast(floating, %[[TMP_B]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float>
+// CIR: %[[TMP_B_F16:.*]] = cir.cast floating %[[TMP_B]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
 // CIR: %[[RESULT:.*]] = cir.binop(add, %[[TMP_A_F16]], %[[TMP_B_F16]]) : !cir.vector<4 x !cir.float>
-// CIR: %[[RESULT_VF16:.*]] = cir.cast(floating, %[[RESULT]] : !cir.vector<4 x !cir.float>), !cir.vector<4 x !cir.f16>
+// CIR: %[[RESULT_VF16:.*]] = cir.cast floating %[[RESULT]] : !cir.vector<4 x !cir.float> -> !cir.vector<4 x !cir.f16>
 // CIR: cir.store{{.*}} %[[RESULT_VF16]], %[[C_ADDR]] : !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>
 
 // LLVM: %[[A_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
diff --git a/clang/test/CIR/CodeGen/vtt.cpp b/clang/test/CIR/CodeGen/vtt.cpp
index baab972bce696..f47da41e5b200 100644
--- a/clang/test/CIR/CodeGen/vtt.cpp
+++ b/clang/test/CIR/CodeGen/vtt.cpp
@@ -281,23 +281,23 @@ D::D() {}
 // CIR-COMMON:        %[[THIS:.*]] = cir.load %[[THIS_ADDR]]
 // CIR-COMMON:        %[[VTT:.*]] = cir.load{{.*}} %[[VTT_ADDR]]
 // CIR-COMMON:        %[[VTT_ADDR_POINT:.*]] = cir.vtt.address_point %[[VTT]] : !cir.ptr<!cir.ptr<!void>>, offset = 0 -> !cir.ptr<!cir.ptr<!void>>
-// CIR-COMMON:        %[[VPTR_ADDR:.*]] = cir.cast(bitcast, %[[VTT_ADDR_POINT]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!cir.vptr>
+// CIR-COMMON:        %[[VPTR_ADDR:.*]] = cir.cast bitcast %[[VTT_ADDR_POINT]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!cir.vptr>
 // CIR-COMMON:        %[[VPTR:.*]] = cir.load{{.*}} %[[VPTR_ADDR]]
 // CIR-COMMON:        %[[B_VPTR_ADDR:.*]] = cir.vtable.get_vptr %[[THIS]]
 // CIR-COMMON:        cir.store{{.*}} %[[VPTR]], %[[B_VPTR_ADDR]]
 // CIR-COMMON:        %[[B_VTT_ADDR_POINT:.*]] = cir.vtt.address_point %[[VTT]] : !cir.ptr<!cir.ptr<!void>>, offset = 1 -> !cir.ptr<!cir.ptr<!void>>
-// CIR-COMMON:        %[[B_VPTR_ADDR:.*]] = cir.cast(bitcast, %[[B_VTT_ADDR_POINT]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!cir.vptr>
+// CIR-COMMON:        %[[B_VPTR_ADDR:.*]] = cir.cast bitcast %[[B_VTT_ADDR_POINT]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!cir.vptr>
 // CIR-COMMON:        %[[B_VPTR:.*]] = cir.load{{.*}} %[[B_VPTR_ADDR]]
 // CIR-COMMON:        %[[B_VPTR_ADDR:.*]] = cir.vtable.get_vptr %[[THIS]]
 // CIR-COMMON:        %[[VPTR:.*]] = cir.load{{.*}} %[[B_VPTR_ADDR]]
-// CIR-COMMON:        %[[VPTR_ADDR2:.*]] = cir.cast(bitcast, %[[VPTR]] : !cir.vptr), !cir.ptr<!u8i>
+// CIR-COMMON:        %[[VPTR_ADDR2:.*]] = cir.cast bitcast %[[VPTR]] : !cir.vptr -> !cir.ptr<!u8i>
 // CIR-COMMON:        %[[CONST_24:.*]] = cir.const #cir.int<-24>
 // CIR-COMMON:        %[[BASE_OFFSET_ADDR:.*]] = cir.ptr_stride(%[[VPTR_ADDR2]] : !cir.ptr<!u8i>, %[[CONST_24]] : !s64i), !cir.ptr<!u8i>
-// CIR-COMMON:        %[[BASE_OFFSET_PTR:.*]] = cir.cast(bitcast, %[[BASE_OFFSET_ADDR]] : !cir.ptr<!u8i>), !cir.ptr<!s64i>
+// CIR-COMMON:        %[[BASE_OFFSET_PTR:.*]] = cir.cast bitcast %[[BASE_OFFSET_ADDR]] : !cir.ptr<!u8i> -> !cir.ptr<!s64i>
 // CIR-COMMON:        %[[BASE_OFFSET:.*]] = cir.load{{.*}} %[[BASE_OFFSET_PTR]] : !cir.ptr<!s64i>, !s64i
-// CIR-COMMON:        %[[THIS_PTR:.*]] = cir.cast(bitcast, %[[THIS]] : !cir.ptr<!rec_B>), !cir.ptr<!u8i>
+// CIR-COMMON:        %[[THIS_PTR:.*]] = cir.cast bitcast %[[THIS]] : !cir.ptr<!rec_B> -> !cir.ptr<!u8i>
 // CIR-COMMON:        %[[BASE_PTR:.*]] = cir.ptr_stride(%[[THIS_PTR]] : !cir.ptr<!u8i>, %[[BASE_OFFSET]] : !s64i), !cir.ptr<!u8i>
-// CIR-COMMON:        %[[BASE_CAST:.*]] = cir.cast(bitcast, %[[BASE_PTR]] : !cir.ptr<!u8i>), !cir.ptr<!rec_B>
+// CIR-COMMON:        %[[BASE_CAST:.*]] = cir.cast bitcast %[[BASE_PTR]] : !cir.ptr<!u8i> -> !cir.ptr<!rec_B>
 // CIR-COMMON:        %[[BASE_VPTR_ADDR:.*]] = cir.vtable.get_vptr %[[BASE_CAST]]
 // CIR-COMMON:        cir.store{{.*}} %[[B_VPTR]], %[[BASE_VPTR_ADDR]]
 
@@ -347,23 +347,23 @@ D::D() {}
 // CIR-COMMON:        %[[THIS:.*]] = cir.load %[[THIS_ADDR]]
 // CIR-COMMON:        %[[VTT:.*]] = cir.load{{.*}} %[[VTT_ADDR]]
 // CIR-COMMON:        %[[VTT_ADDR_POINT:.*]] = cir.vtt.address_point %[[VTT]] : !cir.ptr<!cir.ptr<!void>>, offset = 0 -> !cir.ptr<!cir.ptr<!void>>
-// CIR-COMMON:        %[[VPTR_ADDR:.*]] = cir.cast(bitcast, %[[VTT_ADDR_POINT]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!cir.vptr>
+// CIR-COMMON:        %[[VPTR_ADDR:.*]] = cir.cast bitcast %[[VTT_ADDR_POINT]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!cir.vptr>
 // CIR-COMMON:        %[[VPTR:.*]] = cir.load{{.*}} %[[VPTR_ADDR]]
 // CIR-COMMON:        %[[C_VPTR_ADDR:.*]] = cir.vtable.get_vptr %[[THIS]]
 // CIR-COMMON:        cir.store{{.*}} %[[VPTR]], %[[C_VPTR_ADDR]]
 // CIR-COMMON:        %[[C_VTT_ADDR_POINT:.*]] = cir.vtt.address_point %[[VTT]] : !cir.ptr<!cir.ptr<!void>>, offset = 1 -> !cir.ptr<!cir.ptr<!void>>
-// CIR-COMMON:        %[[C_VPTR_ADDR:.*]] = cir.cast(bitcast, %[[C_VTT_ADDR_POINT]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!cir.vptr>
+// CIR-COMMON:        %[[C_VPTR_ADDR:.*]] = cir.cast bitcast %[[C_VTT_ADDR_POINT]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!cir.vptr>
 // CIR-COMMON:        %[[C_VPTR:.*]] = cir.load{{.*}} %[[C_VPTR_ADDR]]
 // CIR-COMMON:        %[[C_VPTR_ADDR:.*]] = cir.vtable.get_vptr %[[THIS]]
 // CIR-COMMON:        %[[VPTR:.*]] = cir.load{{.*}} %[[C_VPTR_ADDR]]
-// CIR-COMMON:        %[[VPTR_ADDR2:.*]] = cir.cast(bitcast, %[[VPTR]] : !cir.vptr), !cir.ptr<!u8i>
+// CIR-COMMON:        %[[VPTR_ADDR2:.*]] = cir.cast bitcast %[[VPTR]] : !cir.vptr -> !cir.ptr<!u8i>
 // CIR-COMMON:        %[[CONST_24:.*]] = cir.const #cir.int<-24>
 // CIR-COMMON:        %[[BASE_OFFSET_ADDR:.*]] = cir.ptr_stride(%[[VPTR_ADDR2]] : !cir.ptr<!u8i>, %[[CONST_24]] : !s64i), !cir.ptr<!u8i>
-// CIR-COMMON:        %[[BASE_OFFSET_PTR:.*]] = cir.cast(bitcast, %[[BASE_OFFSET_ADDR]] : !cir.ptr<!u8i>), !cir.ptr<!s64i>
+// CIR-COMMON:        %[[BASE_OFFSET_PTR:.*]] = cir.cast bitcast %[[BASE_OFFSET_ADDR]] : !cir.ptr<!u8i> -> !cir.ptr<!s64i>
 // CIR-COMMON:        %[[BASE_OFFSET:.*]] = cir.load{{.*}} %[[BASE_OFFSET_PTR]] : !cir.ptr<!s64i>, !s64i
-// CIR-COMMON:        %[[THIS_PTR:.*]] = cir.cast(bitcast, %[[THIS]] : !cir.ptr<!rec_C>), !cir.ptr<!u8i>
+// CIR-COMMON:        %[[THIS_PTR:.*]] = cir.cast bitcast %[[THIS]] : !cir.ptr<!rec_C> -> !cir.ptr<!u8i>
 // CIR-COMMON:        %[[BASE_PTR:.*]] = cir.ptr_stride(%[[THIS_PTR]] : !cir.ptr<!u8i>, %[[BASE_OFFSET]] : !s64i), !cir.ptr<!u8i>
-// CIR-COMMON:        %[[BASE_CAST:.*]] = cir.cast(bitcast, %[[BASE_PTR]] : !cir.ptr<!u8i>), !cir.ptr<!rec_C>
+// CIR-COMMON:        %[[BASE_CAST:.*]] = cir.cast bitcast %[[BASE_PTR]] : !cir.ptr<!u8i> -> !cir.ptr<!rec_C>
 // CIR-COMMON:        %[[BASE_VPTR_ADDR:.*]] = cir.vtable.get_vptr %[[BASE_CAST]]
 // CIR-COMMON:        cir.store{{.*}} %[[C_VPTR]], %[[BASE_VPTR_ADDR]]
 
@@ -419,27 +419,27 @@ D::D() {}
 // CIR-COMMON:        %[[C_VTT:.*]] = cir.vtt.address_point %[[VTT]] : !cir.ptr<!cir.ptr<!void>>, offset = 3 -> !cir.ptr<!cir.ptr<!void>>
 // CIR-COMMON:        cir.call @_ZN1CC2Ev(%[[C_ADDR]], %[[C_VTT]]) nothrow : (!cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!void>>) -> ()
 // CIR-COMMON:        %[[D_VTT:.*]] = cir.vtt.address_point %[[VTT]] : !cir.ptr<!cir.ptr<!void>>, offset = 0 -> !cir.ptr<!cir.ptr<!void>>
-// CIR-COMMON:        %[[VPTR_ADDR:.*]] = cir.cast(bitcast, %[[D_VTT]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!cir.vptr>
+// CIR-COMMON:        %[[VPTR_ADDR:.*]] = cir.cast bitcast %[[D_VTT]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!cir.vptr>
 // CIR-COMMON:        %[[VPTR:.*]] = cir.load{{.*}} %[[VPTR_ADDR]] : !cir.ptr<!cir.vptr>, !cir.vptr
 // CIR-COMMON:        %[[D_VPTR_ADDR:.*]] = cir.vtable.get_vptr %[[THIS]]
 // CIR-COMMON:        cir.store{{.*}} %[[VPTR]], %[[D_VPTR_ADDR]]
 // CIR-COMMON:        %[[D_VTT_ADDR_POINT:.*]] = cir.vtt.address_point %[[VTT]] : !cir.ptr<!cir.ptr<!void>>, offset = 5 -> !cir.ptr<!cir.ptr<!void>>
-// CIR-COMMON:        %[[D_VPTR_ADDR:.*]] = cir.cast(bitcast, %[[D_VTT_ADDR_POINT]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!cir.vptr>
+// CIR-COMMON:        %[[D_VPTR_ADDR:.*]] = cir.cast bitcast %[[D_VTT_ADDR_POINT]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!cir.vptr>
 // CIR-COMMON:        %[[D_VPTR:.*]] = cir.load{{.*}} %[[D_VPTR_ADDR]] : !cir.ptr<!cir.vptr>, !cir.vptr
 // CIR-COMMON:        %[[D_VPTR_ADDR2:.*]] = cir.vtable.get_vptr %[[THIS]] : !cir.ptr<!rec_D> -> !cir.ptr<!cir.vptr>
 // CIR-COMMON:        %[[VPTR2:.*]] = cir.load{{.*}} %[[D_VPTR_ADDR2]] : !cir.ptr<!cir.vptr>, !cir.vptr
-// CIR-COMMON:        %[[VPTR_ADDR2:.*]] = cir.cast(bitcast, %[[VPTR2]] : !cir.vptr), !cir.ptr<!u8i>
+// CIR-COMMON:        %[[VPTR_ADDR2:.*]] = cir.cast bitcast %[[VPTR2]] : !cir.vptr -> !cir.ptr<!u8i>
 // CIR-COMMON:        %[[CONST_24:.*]] = cir.const #cir.int<-24> : !s64i
 // CIR-COMMON:        %[[BASE_OFFSET_ADDR:.*]] = cir.ptr_stride(%[[VPTR_ADDR2]] : !cir.ptr<!u8i>, %[[CONST_24]] : !s64i), !cir.ptr<!u8i>
-// CIR-COMMON:        %[[BASE_OFFSET_PTR:.*]] = cir.cast(bitcast, %[[BASE_OFFSET_ADDR]] : !cir.ptr<!u8i>), !cir.ptr<!s64i>
+// CIR-COMMON:        %[[BASE_OFFSET_PTR:.*]] = cir.cast bitcast %[[BASE_OFFSET_ADDR]] : !cir.ptr<!u8i> -> !cir.ptr<!s64i>
 // CIR-COMMON:        %[[BASE_OFFSET:.*]] = cir.load{{.*}} %[[BASE_OFFSET_PTR]] : !cir.ptr<!s64i>, !s64i
-// CIR-COMMON:        %[[THIS_PTR:.*]] = cir.cast(bitcast, %[[THIS]] : !cir.ptr<!rec_D>), !cir.ptr<!u8i>
+// CIR-COMMON:        %[[THIS_PTR:.*]] = cir.cast bitcast %[[THIS]] : !cir.ptr<!rec_D> -> !cir.ptr<!u8i>
 // CIR-COMMON:        %[[BASE_PTR:.*]] = cir.ptr_stride(%[[THIS_PTR]] : !cir.ptr<!u8i>, %[[BASE_OFFSET]] : !s64i), !cir.ptr<!u8i>
-// CIR-COMMON:        %[[BASE_CAST:.*]] = cir.cast(bitcast, %[[BASE_PTR]] : !cir.ptr<!u8i>), !cir.ptr<!rec_D>
+// CIR-COMMON:        %[[BASE_CAST:.*]] = cir.cast bitcast %[[BASE_PTR]] : !cir.ptr<!u8i> -> !cir.ptr<!rec_D>
 // CIR-COMMON:        %[[BASE_VPTR_ADDR:.*]] = cir.vtable.get_vptr %[[BASE_CAST]]
 // CIR-COMMON:        cir.store{{.*}} %[[D_VPTR]], %[[BASE_VPTR_ADDR]]
 // CIR-COMMON:        %[[C_VTT_ADDR_POINT:.*]] = cir.vtt.address_point %[[VTT]] : !cir.ptr<!cir.ptr<!void>>, offset = 6 -> !cir.ptr<!cir.ptr<!void>>
-// CIR-COMMON:        %[[C_VPTR_ADDR:.*]] = cir.cast(bitcast, %[[C_VTT_ADDR_POINT]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!cir.vptr>
+// CIR-COMMON:        %[[C_VPTR_ADDR:.*]] = cir.cast bitcast %[[C_VTT_ADDR_POINT]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!cir.vptr>
 // CIR-COMMON:        %[[C_VPTR:.*]] = cir.load{{.*}} %[[C_VPTR_ADDR]] : !cir.ptr<!cir.vptr>, !cir.vptr
 // CIR-COMMON:        %[[C_ADDR:.*]] = cir.base_class_addr %[[THIS]] : !cir.ptr<!rec_D> nonnull [16] -> !cir.ptr<!rec_C>
 // CIR-COMMON:        %[[C_VPTR_ADDR:.*]] = cir.vtable.get_vptr %[[C_ADDR]] : !cir.ptr<!rec_C> -> !cir.ptr<!cir.vptr>
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-copy.c b/clang/test/CIR/CodeGenOpenACC/combined-copy.c
index b4573e66f24a5..c1dc938912845 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-copy.c
+++ b/clang/test/CIR/CodeGenOpenACC/combined-copy.c
@@ -1090,7 +1090,7 @@ void copy_member_of_array_element_member() {
   for(int i = 0; i < 5; ++i);
   // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i
   // CHECK-NEXT: %[[GETINNER:.*]] = cir.get_member %[[OUTER]][0] {name = "inner"} : !cir.ptr<!rec_OuterTy> -> !cir.ptr<!cir.array<!rec_InnerTy x 4>>
-  // CHECK-NEXT: %[[INNERDECAY:.*]] = cir.cast(array_to_ptrdecay, %[[GETINNER]] : !cir.ptr<!cir.array<!rec_InnerTy x 4>>), !cir.ptr<!rec_InnerTy>
+  // CHECK-NEXT: %[[INNERDECAY:.*]] = cir.cast array_to_ptrdecay %[[GETINNER]] : !cir.ptr<!cir.array<!rec_InnerTy x 4>> -> !cir.ptr<!rec_InnerTy>
   // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[INNERDECAY]] : !cir.ptr<!rec_InnerTy>, %[[TWO]] : !s32i), !cir.ptr<!rec_InnerTy>
   // CHECK-NEXT: %[[GETB:.*]] = cir.get_member %[[STRIDE]][1] {name = "b"} : !cir.ptr<!rec_InnerTy> -> !cir.ptr<!s32i>
   // CHECK-NEXT:  %[[COPYIN1:.*]] = acc.copyin varPtr(%[[GETB]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, name = "outer.inner[2].b"}
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp b/clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp
index 57e70df957ae6..e836a37a9bccd 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp
@@ -87,9 +87,9 @@ struct HasDtor {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } copy {
 // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!s32i>, %[[ZERO]] : !u64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !s32i, !cir.ptr<!s32i>
@@ -97,7 +97,7 @@ struct HasDtor {
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[ONE]] : !s64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[ONE_2]] : !u64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
@@ -105,7 +105,7 @@ struct HasDtor {
 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[TWO]] : !s64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[TWO_2]] : !u64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
@@ -113,7 +113,7 @@ struct HasDtor {
 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[THREE]] : !s64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[THREE_2]] : !u64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
@@ -121,7 +121,7 @@ struct HasDtor {
 // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[FOUR]] : !s64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[FOUR_2]] : !u64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
@@ -134,9 +134,9 @@ struct HasDtor {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } copy {
 // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!cir.float>, %[[ZERO]] : !u64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !cir.float, !cir.ptr<!cir.float>
@@ -144,7 +144,7 @@ struct HasDtor {
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[ONE]] : !s64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[ONE_2]] : !u64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
@@ -152,7 +152,7 @@ struct HasDtor {
 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[TWO]] : !s64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[TWO_2]] : !u64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
@@ -160,7 +160,7 @@ struct HasDtor {
 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[THREE]] : !s64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[THREE_2]] : !u64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
@@ -168,7 +168,7 @@ struct HasDtor {
 // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[FOUR]] : !s64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[FOUR_2]] : !u64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
@@ -181,37 +181,37 @@ struct HasDtor {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } copy {
 // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ZERO]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
 //
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
 //
 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
 //
 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
 //
 // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
 //
@@ -224,37 +224,37 @@ struct HasDtor {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } copy {
 // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>), !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>), !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[ZERO]] : !u64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
 //
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[ONE]] : !s64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>), !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
 //
 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[TWO]] : !s64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>), !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
 //
 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[THREE]] : !s64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>), !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
 //
 // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[FOUR]] : !s64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>), !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
 //
@@ -267,37 +267,37 @@ struct HasDtor {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } copy {
 // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ZERO]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
 //
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ONE]] : !s64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
 //
 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[TWO]] : !s64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
 //
 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[THREE]] : !s64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
 //
 // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[FOUR]] : !s64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
 //
@@ -310,37 +310,37 @@ struct HasDtor {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } copy {
 // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor> 
+// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> 
 // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[ZERO]] : !u64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
 //
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[ONE]] : !s64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
 //
 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[TWO]] : !s64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
 //
 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[THREE]] : !s64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
 //
 // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[FOUR]] : !s64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
 //
@@ -349,7 +349,7 @@ struct HasDtor {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}):
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<4> : !u64i
-// CHECK-NEXT: %[[ARRPTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[ARRPTR:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[ELEM:.*]] = cir.ptr_stride(%[[ARRPTR]] : !cir.ptr<!rec_HasDtor>, %[[LAST_IDX]] : !u64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[ITR:.*]] = cir.alloca !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>>, ["__array_idx"]
 // CHECK-NEXT: cir.store %[[ELEM]], %[[ITR]] : !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>>
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-private-clause.cpp b/clang/test/CIR/CodeGenOpenACC/combined-private-clause.cpp
index 639320275ab0f..10f4482fee54f 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-private-clause.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined-private-clause.cpp
@@ -125,7 +125,7 @@ struct HasDtor {
 // CHECK-NEXT: cir.condition(%[[COND]])
 // CHECK-NEXT: } body {
 // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasDtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> ()
 // CHECK-NEXT: cir.yield
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp
index 8cce119bc847e..3d295d58d1026 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp
@@ -263,7 +263,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_DefaultOperators>
@@ -305,7 +305,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -407,7 +407,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<-2147483648> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -509,7 +509,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.int<2147483647> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LARGEST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -611,7 +611,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -714,7 +714,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_DefaultOperators>
@@ -758,7 +758,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_DefaultOperators>
@@ -800,7 +800,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -903,7 +903,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_DefaultOperators>
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp
index 2265a9a7744a4..be33afe07e363 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp
@@ -131,7 +131,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.float>, %[[LAST_IDX]] : !s64i), !cir.ptr<!cir.float>
@@ -160,7 +160,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.fp<1{{.*}}> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -191,7 +191,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.fp<-3.4{{.*}}E+38> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[LEAST]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -222,7 +222,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.fp<3.4{{.*}}E+38> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[LARGEST]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -253,7 +253,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -285,7 +285,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.float>, %[[LAST_IDX]] : !s64i), !cir.ptr<!cir.float>
@@ -315,7 +315,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.float>, %[[LAST_IDX]] : !s64i), !cir.ptr<!cir.float>
@@ -344,7 +344,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.fp<1{{.*}}> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -376,7 +376,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.float>, %[[LAST_IDX]] : !s64i), !cir.ptr<!cir.float>
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp
index a2b9d4015aeb3..f13d96d171123 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp
@@ -310,7 +310,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsInline>
@@ -349,7 +349,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -372,7 +372,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_18HasOperatorsInline : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -471,7 +471,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -494,7 +494,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_18HasOperatorsInline : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<-2147483648> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -593,7 +593,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -616,7 +616,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_18HasOperatorsInline : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.int<2147483647> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LARGEST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -715,7 +715,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -738,7 +738,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_18HasOperatorsInline : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -837,7 +837,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -861,7 +861,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsInline>
@@ -901,7 +901,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -925,7 +925,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsInline>
@@ -964,7 +964,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -988,7 +988,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_18HasOperatorsInline : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -1087,7 +1087,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -1111,7 +1111,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsInline>
@@ -1151,7 +1151,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp
index e7caf83e9b862..952fee9b1ac1a 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp
@@ -134,7 +134,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!s32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!s32i>
@@ -163,7 +163,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -194,7 +194,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<-2147483648> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[LEAST]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -225,7 +225,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.int<2147483647> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[LARGEST]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -256,7 +256,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -288,7 +288,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!s32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!s32i>
@@ -318,7 +318,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!s32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!s32i>
@@ -347,7 +347,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -379,7 +379,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!s32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!s32i>
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp
index bf9aa0ad59d60..15646ed87b284 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp
@@ -310,7 +310,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsOutline>
@@ -349,7 +349,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -372,7 +372,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -471,7 +471,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -494,7 +494,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<-2147483648> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -593,7 +593,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -616,7 +616,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.int<2147483647> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LARGEST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -715,7 +715,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -738,7 +738,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -837,7 +837,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -861,7 +861,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsOutline>
@@ -901,7 +901,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -925,7 +925,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsOutline>
@@ -964,7 +964,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -987,7 +987,7 @@ void acc_combined() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -1086,7 +1086,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -1111,7 +1111,7 @@ void acc_combined() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsOutline>
@@ -1151,7 +1151,7 @@ void acc_combined() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp
index b8140335f7c29..98f2ffd2cb12a 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp
@@ -191,7 +191,7 @@ extern "C" void acc_combined(int N, int cond) {
 #pragma acc serial loop self(N)
   for(unsigned I = 0; I < N; ++I);
   // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[N_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[N_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.serial combined(loop) self(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.loop combined(serial) {
@@ -203,7 +203,7 @@ extern "C" void acc_combined(int N, int cond) {
 #pragma acc parallel loop if(N)
   for(unsigned I = 0; I < N; ++I);
   // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[N_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[N_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.parallel combined(loop) if(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.loop combined(parallel) {
@@ -215,7 +215,7 @@ extern "C" void acc_combined(int N, int cond) {
 #pragma acc serial loop if(1)
   for(unsigned I = 0; I < N; ++I);
   // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[ONE_LITERAL]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[ONE_LITERAL]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.serial combined(loop) if(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.loop combined(serial) {
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c b/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c
index 947b281bfd9db..de6e7b0314fa9 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c
+++ b/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c
@@ -40,9 +40,9 @@ struct NoCopyConstruct {};
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } copy {
 // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!s32i>, %[[ZERO]] : !u64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !s32i, !cir.ptr<!s32i>
@@ -50,7 +50,7 @@ struct NoCopyConstruct {};
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[ONE]] : !s64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[ONE_2]] : !u64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
@@ -58,7 +58,7 @@ struct NoCopyConstruct {};
 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[TWO]] : !s64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[TWO_2]] : !u64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
@@ -66,7 +66,7 @@ struct NoCopyConstruct {};
 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[THREE]] : !s64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[THREE_2]] : !u64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
@@ -74,7 +74,7 @@ struct NoCopyConstruct {};
 // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[FOUR]] : !s64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[FOUR_2]] : !u64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
@@ -87,9 +87,9 @@ struct NoCopyConstruct {};
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } copy {
 // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!cir.float>, %[[ZERO]] : !u64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !cir.float, !cir.ptr<!cir.float>
@@ -97,7 +97,7 @@ struct NoCopyConstruct {};
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[ONE]] : !s64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[ONE_2]] : !u64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
@@ -105,7 +105,7 @@ struct NoCopyConstruct {};
 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[TWO]] : !s64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[TWO_2]] : !u64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
@@ -113,7 +113,7 @@ struct NoCopyConstruct {};
 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[THREE]] : !s64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[THREE_2]] : !u64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
@@ -121,7 +121,7 @@ struct NoCopyConstruct {};
 // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[FOUR]] : !s64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[FOUR_2]] : !u64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
@@ -134,37 +134,37 @@ struct NoCopyConstruct {};
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } copy {
 // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ZERO]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: cir.copy %[[FROM_OFFSET:.*]] to %[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>
 //
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: cir.copy %[[FROM_OFFSET]] to %[[TO_OFFSET]] : !cir.ptr<!rec_NoCopyConstruct>
 //
 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: cir.copy %[[FROM_OFFSET]] to %[[TO_OFFSET]] : !cir.ptr<!rec_NoCopyConstruct>
 //
 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: cir.copy %[[FROM_OFFSET]] to %[[TO_OFFSET]] : !cir.ptr<!rec_NoCopyConstruct>
 //
 // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: cir.copy %[[FROM_OFFSET]] to %[[TO_OFFSET]] : !cir.ptr<!rec_NoCopyConstruct>
 //
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp b/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp
index 49fd78cb385e6..fca3ca85c9edf 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp
@@ -87,9 +87,9 @@ struct HasDtor {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } copy {
 // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!s32i>, %[[ZERO]] : !u64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !s32i, !cir.ptr<!s32i>
@@ -97,7 +97,7 @@ struct HasDtor {
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[ONE]] : !s64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[ONE_2]] : !u64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
@@ -105,7 +105,7 @@ struct HasDtor {
 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[TWO]] : !s64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[TWO_2]] : !u64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
@@ -113,7 +113,7 @@ struct HasDtor {
 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[THREE]] : !s64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[THREE_2]] : !u64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
@@ -121,7 +121,7 @@ struct HasDtor {
 // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!s32i>, %[[FOUR]] : !s64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!s32i>, %[[FOUR_2]] : !u64i), !cir.ptr<!s32i>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!s32i>, !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !s32i, !cir.ptr<!s32i>
@@ -134,9 +134,9 @@ struct HasDtor {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } copy {
 // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!cir.float>, %[[ZERO]] : !u64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_DECAY]] : !cir.float, !cir.ptr<!cir.float>
@@ -144,7 +144,7 @@ struct HasDtor {
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[ONE]] : !s64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[ONE_2]] : !u64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
@@ -152,7 +152,7 @@ struct HasDtor {
 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[TWO]] : !s64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[TWO_2]] : !u64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
@@ -160,7 +160,7 @@ struct HasDtor {
 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[THREE]] : !s64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[THREE_2]] : !u64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
@@ -168,7 +168,7 @@ struct HasDtor {
 // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!cir.float>, %[[FOUR]] : !s64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!cir.float>, %[[FOUR_2]] : !u64i), !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[FROM_LOAD:.*]] = cir.load {{.*}}%[[FROM_OFFSET]] : !cir.ptr<!cir.float>, !cir.float
 // CHECK-NEXT: cir.store {{.*}} %[[FROM_LOAD]], %[[TO_OFFSET]] : !cir.float, !cir.ptr<!cir.float>
@@ -181,37 +181,37 @@ struct HasDtor {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } copy {
 // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ZERO]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
 //
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
 //
 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
 //
 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
 //
 // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR]] : !s64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>), !cir.ptr<!rec_NoCopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> -> !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NoCopyConstruct>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_NoCopyConstruct>
 // CHECK-NEXT: cir.call @_ZN15NoCopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NoCopyConstruct>, !cir.ptr<!rec_NoCopyConstruct>) -> ()
 //
@@ -224,37 +224,37 @@ struct HasDtor {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } copy {
 // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>), !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>), !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[ZERO]] : !u64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
 //
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[ONE]] : !s64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>), !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
 //
 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[TWO]] : !s64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>), !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
 //
 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[THREE]] : !s64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>), !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
 //
 // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_CopyConstruct>, %[[FOUR]] : !s64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>), !cir.ptr<!rec_CopyConstruct>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> -> !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_CopyConstruct>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_CopyConstruct>
 // CHECK-NEXT: cir.call @_ZN13CopyConstructC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) : (!cir.ptr<!rec_CopyConstruct>, !cir.ptr<!rec_CopyConstruct>) -> ()
 //
@@ -267,37 +267,37 @@ struct HasDtor {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } copy {
 // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[FROM_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[FROM_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ZERO]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
 //
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ONE]] : !s64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
 //
 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[TWO]] : !s64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
 //
 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[THREE]] : !s64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
 //
 // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_NonDefaultCtor>, %[[FOUR]] : !s64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>), !cir.ptr<!rec_NonDefaultCtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> -> !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_NonDefaultCtor>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_NonDefaultCtor>
 // CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_NonDefaultCtor>, !cir.ptr<!rec_NonDefaultCtor>) -> ()
 //
@@ -310,37 +310,37 @@ struct HasDtor {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT: } copy {
 // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}):
-// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor> 
+// CHECK-NEXT: %[[TO_DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG_TO]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor> 
 // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[ZERO]] : !u64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_DECAY]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
 //
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[ONE]] : !s64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[ONE_2:.*]] = cir.const #cir.int<1>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[ONE_2]] : !u64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
 //
 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[TWO]] : !s64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[TWO_2:.*]] = cir.const #cir.int<2>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[TWO_2]] : !u64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
 //
 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[THREE]] : !s64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[THREE_2:.*]] = cir.const #cir.int<3>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[THREE_2]] : !u64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
 //
 // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4>
 // CHECK-NEXT: %[[TO_OFFSET:.*]] = cir.ptr_stride(%[[TO_DECAY]] : !cir.ptr<!rec_HasDtor>, %[[FOUR]] : !s64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[FOUR_2:.*]] = cir.const #cir.int<4>
-// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast(array_to_ptrdecay, %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[DECAY_FROM:.*]] =  cir.cast array_to_ptrdecay %[[ARG_FROM]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[FROM_OFFSET:.*]] = cir.ptr_stride(%[[DECAY_FROM]] : !cir.ptr<!rec_HasDtor>, %[[FOUR_2]] : !u64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: cir.call @_ZN7HasDtorC1ERKS_(%[[TO_OFFSET]], %[[FROM_OFFSET]]) nothrow : (!cir.ptr<!rec_HasDtor>, !cir.ptr<!rec_HasDtor>) -> ()
 //
@@ -349,7 +349,7 @@ struct HasDtor {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}):
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<4> : !u64i
-// CHECK-NEXT: %[[ARRPTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[ARRPTR:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[ELEM:.*]] = cir.ptr_stride(%[[ARRPTR]] : !cir.ptr<!rec_HasDtor>, %[[LAST_IDX]] : !u64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[ITR:.*]] = cir.alloca !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>>, ["__array_idx"]
 // CHECK-NEXT: cir.store %[[ELEM]], %[[ITR]] : !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>>
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-private-clause.cpp b/clang/test/CIR/CodeGenOpenACC/compute-private-clause.cpp
index 97399d9d4620e..d8542225b45fd 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-private-clause.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/compute-private-clause.cpp
@@ -113,7 +113,7 @@ struct HasDtor {
 // CHECK-NEXT: cir.condition(%[[COND]])
 // CHECK-NEXT: } body {
 // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasDtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> ()
 // CHECK-NEXT: cir.yield
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c
index fff72dcbdd204..e357f440eb4c3 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c
@@ -260,7 +260,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_DefaultOperators>
@@ -289,7 +289,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -392,7 +392,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<-2147483648> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -494,7 +494,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.int<2147483647> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LARGEST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -596,7 +596,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -699,7 +699,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_DefaultOperators>
@@ -729,7 +729,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_DefaultOperators>
@@ -758,7 +758,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -861,7 +861,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_DefaultOperators>
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp
index c5b45f2d2efe1..e0098bc625459 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp
@@ -263,7 +263,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_DefaultOperators>
@@ -305,7 +305,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -407,7 +407,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<-2147483648> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -509,7 +509,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.int<2147483647> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LARGEST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -611,7 +611,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -714,7 +714,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_DefaultOperators>
@@ -758,7 +758,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_DefaultOperators>
@@ -800,7 +800,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -903,7 +903,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_DefaultOperators>
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c
index 5b0dcadece4f8..5336fadc9fd0c 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c
@@ -131,7 +131,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.float>, %[[LAST_IDX]] : !s64i), !cir.ptr<!cir.float>
@@ -160,7 +160,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.fp<1{{.*}}> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -191,7 +191,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.fp<-3.4{{.*}}E+38> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[LEAST]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -222,7 +222,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.fp<3.4{{.*}}E+38> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[LARGEST]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -253,7 +253,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -285,7 +285,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.float>, %[[LAST_IDX]] : !s64i), !cir.ptr<!cir.float>
@@ -315,7 +315,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.float>, %[[LAST_IDX]] : !s64i), !cir.ptr<!cir.float>
@@ -344,7 +344,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.fp<1{{.*}}> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -376,7 +376,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.float>, %[[LAST_IDX]] : !s64i), !cir.ptr<!cir.float>
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp
index 35a79d15d7756..a51388203a3d8 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp
@@ -132,7 +132,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.float>, %[[LAST_IDX]] : !s64i), !cir.ptr<!cir.float>
@@ -161,7 +161,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.fp<1{{.*}}> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -192,7 +192,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.fp<-3.4{{.*}}E+38> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[LEAST]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -223,7 +223,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.fp<3.4{{.*}}E+38> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[LARGEST]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -254,7 +254,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -286,7 +286,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.float>, %[[LAST_IDX]] : !s64i), !cir.ptr<!cir.float>
@@ -316,7 +316,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.float>, %[[LAST_IDX]] : !s64i), !cir.ptr<!cir.float>
@@ -345,7 +345,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.fp<1{{.*}}> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -377,7 +377,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.float>, %[[LAST_IDX]] : !s64i), !cir.ptr<!cir.float>
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp
index 1844440a47857..1968c0ac740dd 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp
@@ -310,7 +310,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsInline>
@@ -349,7 +349,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -372,7 +372,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_18HasOperatorsInline : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -471,7 +471,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -494,7 +494,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_18HasOperatorsInline : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<-2147483648> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -593,7 +593,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -616,7 +616,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_18HasOperatorsInline : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.int<2147483647> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LARGEST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -715,7 +715,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -738,7 +738,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_18HasOperatorsInline : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -837,7 +837,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -861,7 +861,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsInline>
@@ -901,7 +901,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -925,7 +925,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsInline>
@@ -964,7 +964,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -988,7 +988,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_18HasOperatorsInline : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -1087,7 +1087,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -1111,7 +1111,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsInline>
@@ -1151,7 +1151,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c
index 363e88502e815..f63e340b29aa7 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c
@@ -132,7 +132,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!s32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!s32i>
@@ -161,7 +161,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -192,7 +192,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<-2147483648> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[LEAST]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -223,7 +223,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.int<2147483647> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[LARGEST]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -254,7 +254,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -286,7 +286,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!s32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!s32i>
@@ -316,7 +316,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!s32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!s32i>
@@ -345,7 +345,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -377,7 +377,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!s32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!s32i>
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp
index a4320e6db5f7a..48e5ac94627f5 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp
@@ -134,7 +134,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!s32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!s32i>
@@ -163,7 +163,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -194,7 +194,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<-2147483648> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[LEAST]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -225,7 +225,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.int<2147483647> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[LARGEST]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -256,7 +256,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -288,7 +288,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!s32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!s32i>
@@ -318,7 +318,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!s32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!s32i>
@@ -347,7 +347,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -379,7 +379,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!s32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!s32i>
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp
index b56c1698e9eeb..6d204bc9060b0 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp
@@ -310,7 +310,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsOutline>
@@ -349,7 +349,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -372,7 +372,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -471,7 +471,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -494,7 +494,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<-2147483648> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -593,7 +593,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -616,7 +616,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.int<2147483647> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LARGEST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -715,7 +715,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -738,7 +738,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -837,7 +837,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -861,7 +861,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsOutline>
@@ -901,7 +901,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -925,7 +925,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsOutline>
@@ -964,7 +964,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -987,7 +987,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -1086,7 +1086,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -1111,7 +1111,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsOutline>
@@ -1151,7 +1151,7 @@ void acc_compute() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c
index 0e815b7c7059a..35a7e7a951f74 100644
--- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c
+++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c
@@ -132,7 +132,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!u32i x 5>, !cir.ptr<!cir.array<!u32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!u32i>, !cir.ptr<!cir.ptr<!u32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>>), !cir.ptr<!u32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!u32i>, !cir.ptr<!cir.ptr<!u32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!u32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!u32i>
@@ -161,7 +161,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_j : !cir.ptr<!cir.array<!u32i x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!u32i x 5>, !cir.ptr<!cir.array<!u32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>>), !cir.ptr<!u32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u32i
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !u32i, !cir.ptr<!u32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -192,7 +192,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_j : !cir.ptr<!cir.array<!u32i x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>>{{.*}})
 // CHECK-NEXT: cir.alloca !cir.array<!u32i x 5>, !cir.ptr<!cir.array<!u32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>>), !cir.ptr<!u32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<0> : !u32i
 // CHECK-NEXT: cir.store{{.*}} %[[LEAST]], %[[DECAY]] : !u32i, !cir.ptr<!u32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -223,7 +223,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_j : !cir.ptr<!cir.array<!u32i x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>>{{.*}})
 // CHECK-NEXT: cir.alloca !cir.array<!u32i x 5>, !cir.ptr<!cir.array<!u32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>>), !cir.ptr<!u32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.int<4294967295> : !u32i
 // CHECK-NEXT: cir.store{{.*}} %[[LARGEST]], %[[DECAY]] : !u32i, !cir.ptr<!u32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -254,7 +254,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_j : !cir.ptr<!cir.array<!u32i x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>>{{.*}})
 // CHECK-NEXT: cir.alloca !cir.array<!u32i x 5>, !cir.ptr<!cir.array<!u32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>>), !cir.ptr<!u32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i
 // CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[DECAY]] : !u32i, !cir.ptr<!u32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -286,7 +286,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!u32i x 5>, !cir.ptr<!cir.array<!u32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!u32i>, !cir.ptr<!cir.ptr<!u32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>>), !cir.ptr<!u32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!u32i>, !cir.ptr<!cir.ptr<!u32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!u32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!u32i>
@@ -316,7 +316,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!u32i x 5>, !cir.ptr<!cir.array<!u32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!u32i>, !cir.ptr<!cir.ptr<!u32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>>), !cir.ptr<!u32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!u32i>, !cir.ptr<!cir.ptr<!u32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!u32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!u32i>
@@ -345,7 +345,7 @@ void acc_compute() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_j : !cir.ptr<!cir.array<!u32i x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!u32i x 5>, !cir.ptr<!cir.array<!u32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>>), !cir.ptr<!u32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u32i
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !u32i, !cir.ptr<!u32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -377,7 +377,7 @@ void acc_compute() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!u32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!u32i x 5>, !cir.ptr<!cir.array<!u32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!u32i>, !cir.ptr<!cir.ptr<!u32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>>), !cir.ptr<!u32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!u32i x 5>> -> !cir.ptr<!u32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!u32i>, !cir.ptr<!cir.ptr<!u32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!u32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!u32i>
diff --git a/clang/test/CIR/CodeGenOpenACC/data.c b/clang/test/CIR/CodeGenOpenACC/data.c
index 1f6a76ce1ea7c..4e13f17f4bfd7 100644
--- a/clang/test/CIR/CodeGenOpenACC/data.c
+++ b/clang/test/CIR/CodeGenOpenACC/data.c
@@ -87,7 +87,7 @@ void acc_data(int cond) {
 #pragma acc data default(none) if(cond)
   {}
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.data if(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.terminator
@@ -96,7 +96,7 @@ void acc_data(int cond) {
 #pragma acc data default(none) if(1)
   {}
   // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[ONE_LITERAL]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[ONE_LITERAL]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.data if(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.terminator
diff --git a/clang/test/CIR/CodeGenOpenACC/host_data.c b/clang/test/CIR/CodeGenOpenACC/host_data.c
index fa06d2a1cbd26..bcfa175f4e525 100644
--- a/clang/test/CIR/CodeGenOpenACC/host_data.c
+++ b/clang/test/CIR/CodeGenOpenACC/host_data.c
@@ -38,7 +38,7 @@ void acc_host_data(int cond, int var1, int var2, int *arr) {
   // CHECK-NEXT: %[[USE_DEV1:.*]] = acc.use_device varPtr(%[[V1]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "var1"}
   // CHECK-NEXT: %[[USE_DEV2:.*]] = acc.use_device varPtr(%[[V2]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "var2"}
   // CHECK-NEXT: %[[LOAD_COND:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[COND_BOOL:.*]] = cir.cast(int_to_bool, %[[LOAD_COND]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[COND_BOOL:.*]] = cir.cast int_to_bool %[[LOAD_COND]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[COND_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_BOOL]] : !cir.bool to i1
   // CHECK-NEXT: acc.host_data if(%[[COND_CAST]]) dataOperands(%[[USE_DEV1]], %[[USE_DEV2]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>) {
   // CHECK-NEXT: acc.terminator
@@ -49,7 +49,7 @@ void acc_host_data(int cond, int var1, int var2, int *arr) {
   // CHECK-NEXT: %[[USE_DEV1:.*]] = acc.use_device varPtr(%[[V1]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "var1"}
   // CHECK-NEXT: %[[USE_DEV2:.*]] = acc.use_device varPtr(%[[V2]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "var2"}
   // CHECK-NEXT: %[[LOAD_COND:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[COND_BOOL:.*]] = cir.cast(int_to_bool, %[[LOAD_COND]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[COND_BOOL:.*]] = cir.cast int_to_bool %[[LOAD_COND]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[COND_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_BOOL]] : !cir.bool to i1
   // CHECK-NEXT: acc.host_data if(%[[COND_CAST]]) dataOperands(%[[USE_DEV1]], %[[USE_DEV2]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>) {
   // CHECK-NEXT: acc.terminator
diff --git a/clang/test/CIR/CodeGenOpenACC/init.c b/clang/test/CIR/CodeGenOpenACC/init.c
index 805fb08dbf487..829850f2c82d6 100644
--- a/clang/test/CIR/CodeGenOpenACC/init.c
+++ b/clang/test/CIR/CodeGenOpenACC/init.c
@@ -18,13 +18,13 @@ void acc_init(int cond) {
 
 #pragma acc init if(cond)
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.init if(%[[BOOL_CONV]])
 
 #pragma acc init if(1)
   // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
-  // CHECK-NEXT: %[[ONE_TO_BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[ONE_LITERAL]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[ONE_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ONE_LITERAL]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[ONE_TO_BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.init if(%[[BOOL_CONV]])
 
@@ -40,7 +40,7 @@ void acc_init(int cond) {
 
 #pragma acc init if(cond) device_num(cond) device_type(*)
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_CAST]] : !cir.bool to i1
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
   // CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
diff --git a/clang/test/CIR/CodeGenOpenACC/kernels.c b/clang/test/CIR/CodeGenOpenACC/kernels.c
index 9b10b7489e814..9f33e54a345b1 100644
--- a/clang/test/CIR/CodeGenOpenACC/kernels.c
+++ b/clang/test/CIR/CodeGenOpenACC/kernels.c
@@ -29,7 +29,7 @@ void acc_kernels(int cond) {
   // CHECK-NEXT: cir.scope {
   // CHECK-NEXT: cir.while {
   // CHECK-NEXT: %[[INT:.*]] = cir.const #cir.int<1>
-  // CHECK-NEXT: %[[CAST:.*]] = cir.cast(int_to_bool, %[[INT]] :
+  // CHECK-NEXT: %[[CAST:.*]] = cir.cast int_to_bool %[[INT]]
   // CHECK-NEXT: cir.condition(%[[CAST]])
   // CHECK-NEXT: } do {
   // CHECK-NEXT: cir.yield
@@ -49,7 +49,7 @@ void acc_kernels(int cond) {
 #pragma acc kernels self(cond)
   {}
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.kernels self(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.terminator
@@ -58,7 +58,7 @@ void acc_kernels(int cond) {
 #pragma acc kernels self(0)
   {}
   // CHECK-NEXT: %[[ZERO_LITERAL:.*]] = cir.const #cir.int<0> : !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[ZERO_LITERAL]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[ZERO_LITERAL]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.kernels self(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.terminator
@@ -67,7 +67,7 @@ void acc_kernels(int cond) {
 #pragma acc kernels if(cond)
   {}
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.kernels if(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.terminator
@@ -76,7 +76,7 @@ void acc_kernels(int cond) {
 #pragma acc kernels if(1)
   {}
   // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[ONE_LITERAL]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[ONE_LITERAL]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.kernels if(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.terminator
diff --git a/clang/test/CIR/CodeGenOpenACC/loop-private-clause.cpp b/clang/test/CIR/CodeGenOpenACC/loop-private-clause.cpp
index d4fd4ccc68f7a..b356f0fb26cc4 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop-private-clause.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop-private-clause.cpp
@@ -125,7 +125,7 @@ struct HasDtor {
 // CHECK-NEXT: cir.condition(%[[COND]])
 // CHECK-NEXT: } body {
 // CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>> -> !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasDtor>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_HasDtor>
 // CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> ()
 // CHECK-NEXT: cir.yield
diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp
index 7130a2bdccdcc..73b8fe27c6aa1 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp
@@ -263,7 +263,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_DefaultOperators>
@@ -305,7 +305,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -407,7 +407,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<-2147483648> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -509,7 +509,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.int<2147483647> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LARGEST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -611,7 +611,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -714,7 +714,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_DefaultOperators>
@@ -758,7 +758,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_DefaultOperators>
@@ -800,7 +800,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -903,7 +903,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_DefaultOperators>, !cir.ptr<!cir.ptr<!rec_DefaultOperators>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_DefaultOperators>
diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp
index e549104e0fedb..77c61382c06bf 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp
@@ -132,7 +132,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.float>, %[[LAST_IDX]] : !s64i), !cir.ptr<!cir.float>
@@ -161,7 +161,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.fp<1{{.*}}> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -192,7 +192,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.fp<-3.4{{.*}}E+38> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[LEAST]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -223,7 +223,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.fp<3.4{{.*}}E+38> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[LARGEST]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -254,7 +254,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.fp<0xF{{.*}}> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -286,7 +286,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.float>, %[[LAST_IDX]] : !s64i), !cir.ptr<!cir.float>
@@ -316,7 +316,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.float>, %[[LAST_IDX]] : !s64i), !cir.ptr<!cir.float>
@@ -345,7 +345,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.fp<1{{.*}}> : !cir.float
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !cir.float, !cir.ptr<!cir.float>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -377,7 +377,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>>), !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!cir.float x 5>> -> !cir.ptr<!cir.float>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!cir.float>, !cir.ptr<!cir.ptr<!cir.float>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!cir.float>, %[[LAST_IDX]] : !s64i), !cir.ptr<!cir.float>
diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp
index c2ece70bae7a2..6ca0654b0384d 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp
@@ -310,7 +310,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsInline>
@@ -349,7 +349,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -372,7 +372,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_18HasOperatorsInline : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -471,7 +471,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -494,7 +494,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_18HasOperatorsInline : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<-2147483648> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -593,7 +593,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -616,7 +616,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_18HasOperatorsInline : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.int<2147483647> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LARGEST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -715,7 +715,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -738,7 +738,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_18HasOperatorsInline : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -837,7 +837,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -861,7 +861,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsInline>
@@ -901,7 +901,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -925,7 +925,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsInline>
@@ -964,7 +964,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -988,7 +988,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_18HasOperatorsInline : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsInline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -1087,7 +1087,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
@@ -1111,7 +1111,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsInline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsInline>
@@ -1151,7 +1151,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>>), !cir.ptr<!rec_HasOperatorsInline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsInline x 5>> -> !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsInline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsInline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsInline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsInline>>
diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp
index f9169df97f1e6..dd3c54fa8f023 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp
@@ -134,7 +134,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!s32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!s32i>
@@ -163,7 +163,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -194,7 +194,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<-2147483648> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[LEAST]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -225,7 +225,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.int<2147483647> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[LARGEST]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -256,7 +256,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[ALL_ONES]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -288,7 +288,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!s32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!s32i>
@@ -318,7 +318,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!s32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!s32i>
@@ -347,7 +347,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store{{.*}} %[[ONE]], %[[DECAY]] : !s32i, !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE_IDX:.*]] = cir.const #cir.int<1> : !s64i
@@ -379,7 +379,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>>), !cir.ptr<!s32i>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!s32i>, %[[LAST_IDX]] : !s64i), !cir.ptr<!s32i>
diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp
index a3bf17356b81c..d36f9c608920e 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp
@@ -310,7 +310,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsOutline>
@@ -349,7 +349,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -372,7 +372,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_mul__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <mul> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -471,7 +471,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -494,7 +494,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_max__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <max> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<-2147483648> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -593,7 +593,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -616,7 +616,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_min__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <min> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[LARGEST:.*]] = cir.const #cir.int<2147483647> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[LARGEST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -715,7 +715,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -738,7 +738,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_iand__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <iand> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ALL_ONES:.*]] = cir.const #cir.int<-1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ALL_ONES]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -837,7 +837,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -861,7 +861,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsOutline>
@@ -901,7 +901,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -925,7 +925,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsOutline>
@@ -964,7 +964,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -987,7 +987,7 @@ void acc_loop() {
 // CHECK-NEXT: acc.reduction.recipe @reduction_land__ZTSA5_19HasOperatorsOutline : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> reduction_operator <land> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[DECAY]][0] {name = "i"} : !cir.ptr<!rec_HasOperatorsOutline> -> !cir.ptr<!s32i>
 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
 // CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
@@ -1086,7 +1086,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
@@ -1111,7 +1111,7 @@ void acc_loop() {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>{{.*}})
 // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_HasOperatorsOutline x 5>, !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>, ["openacc.reduction.init", init]
 // CHECK-NEXT: %[[TEMP_ITR:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["arrayinit.temp"]
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: cir.store {{.*}} %[[DECAY]], %[[TEMP_ITR]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<5> : !s64i
 // CHECK-NEXT: %[[END_ITR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[LAST_IDX]] : !s64i), !cir.ptr<!rec_HasOperatorsOutline>
@@ -1151,7 +1151,7 @@ void acc_loop() {
 // CHECK-NEXT: } destroy {
 // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}, %[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> {{.*}}):  
 // CHECK-NEXT: %[[SIZE:.*]] = cir.const #cir.int<4>  : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>>), !cir.ptr<!rec_HasOperatorsOutline>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ARG]] : !cir.ptr<!cir.array<!rec_HasOperatorsOutline x 5>> -> !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_HasOperatorsOutline>, %[[SIZE]] : !u64i), !cir.ptr<!rec_HasOperatorsOutline>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[IDX]] : !cir.ptr<!rec_HasOperatorsOutline>, !cir.ptr<!cir.ptr<!rec_HasOperatorsOutline>>
diff --git a/clang/test/CIR/CodeGenOpenACC/parallel.c b/clang/test/CIR/CodeGenOpenACC/parallel.c
index 5db174fb6549b..7080a8d5e579a 100644
--- a/clang/test/CIR/CodeGenOpenACC/parallel.c
+++ b/clang/test/CIR/CodeGenOpenACC/parallel.c
@@ -28,7 +28,7 @@ void acc_parallel(int cond) {
   // CHECK-NEXT: cir.scope {
   // CHECK-NEXT: cir.while {
   // CHECK-NEXT: %[[INT:.*]] = cir.const #cir.int<1>
-  // CHECK-NEXT: %[[CAST:.*]] = cir.cast(int_to_bool, %[[INT]] :
+  // CHECK-NEXT: %[[CAST:.*]] = cir.cast int_to_bool %[[INT]]
   // CHECK-NEXT: cir.condition(%[[CAST]])
   // CHECK-NEXT: } do {
   // CHECK-NEXT: cir.yield
@@ -48,7 +48,7 @@ void acc_parallel(int cond) {
 #pragma acc parallel self(cond)
   {}
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.parallel self(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.yield
@@ -57,7 +57,7 @@ void acc_parallel(int cond) {
 #pragma acc parallel self(0)
   {}
   // CHECK-NEXT: %[[ZERO_LITERAL:.*]] = cir.const #cir.int<0> : !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[ZERO_LITERAL]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[ZERO_LITERAL]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.parallel self(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.yield
@@ -66,7 +66,7 @@ void acc_parallel(int cond) {
 #pragma acc parallel if(cond)
   {}
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.parallel if(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.yield
@@ -75,7 +75,7 @@ void acc_parallel(int cond) {
 #pragma acc parallel if(1)
   {}
   // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[ONE_LITERAL]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[ONE_LITERAL]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.parallel if(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.yield
diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-CtorDtor.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-CtorDtor.cpp
index c62ebe26584b8..30a14ac836e8e 100644
--- a/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-CtorDtor.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-CtorDtor.cpp
@@ -34,7 +34,7 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: cir.condition(%[[COND]])
 // CHECK-NEXT: } body {
 // CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> -> !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: cir.call @_ZN8CtorDtorD1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_CtorDtor>) -> ()
 // CHECK-NEXT: cir.yield
@@ -55,7 +55,7 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CtorDtor x 5>> {{.*}}):
 // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!rec_CtorDtor x 5>, !cir.ptr<!cir.array<!rec_CtorDtor x 5>>, ["openacc.private.init", init] {alignment = 16 : i64}
 // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<5> : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> -> !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[ONE_PAST_LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[ARR_SIZE]] : !u64i), !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[ARR_IDX:.*]] = cir.alloca !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[DECAY]], %[[ARR_IDX]] : !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
@@ -75,7 +75,7 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT:} destroy {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CtorDtor x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.array<!rec_CtorDtor x 5>> {{.*}}):
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<4> : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> -> !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[LAST_IDX]] : !u64i), !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[ARR_IDX:.*]] = cir.alloca !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[ARR_IDX]] : !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
@@ -120,7 +120,7 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: } body {
 //
 // CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>), !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>> -> !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
 // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
 //
 // CHECK-NEXT: cir.scope {
@@ -139,7 +139,7 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: cir.condition(%[[COND]])
 // CHECK-NEXT: } body {
 // CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> -> !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: cir.call @_ZN8CtorDtorD1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_CtorDtor>) -> ()
 // CHECK-NEXT: cir.yield
@@ -169,9 +169,9 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_A5_8CtorDtor : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>> {{.*}}):
 // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!cir.array<!rec_CtorDtor x 5> x 5>, !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>, ["openacc.private.init", init] {alignment = 16 : i64}
-// CHECK-NEXT: %[[BITCAST:.*]] = cir.cast(bitcast, %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>), !cir.ptr<!cir.array<!rec_CtorDtor x 25>>
+// CHECK-NEXT: %[[BITCAST:.*]] = cir.cast bitcast %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>> -> !cir.ptr<!cir.array<!rec_CtorDtor x 25>>
 // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<25> : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[BITCAST]] : !cir.ptr<!cir.array<!rec_CtorDtor x 25>>), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[BITCAST]] : !cir.ptr<!cir.array<!rec_CtorDtor x 25>> -> !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[ONE_PAST_LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[ARR_SIZE]] : !u64i), !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[ARR_IDX:.*]] = cir.alloca !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[DECAY]], %[[ARR_IDX]] : !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
@@ -190,9 +190,9 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT:} destroy {
 // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>> {{.*}}):
-// CHECK-NEXT: %[[BITCAST:.*]] = cir.cast(bitcast, %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>), !cir.ptr<!cir.array<!rec_CtorDtor x 25>>
+// CHECK-NEXT: %[[BITCAST:.*]] = cir.cast bitcast %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>> -> !cir.ptr<!cir.array<!rec_CtorDtor x 25>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<24> : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[BITCAST]] : !cir.ptr<!cir.array<!rec_CtorDtor x 25>>), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[BITCAST]] : !cir.ptr<!cir.array<!rec_CtorDtor x 25>> -> !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[LAST_IDX]] : !u64i), !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[ARR_IDX:.*]] = cir.alloca !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[ARR_IDX]] : !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
@@ -236,7 +236,7 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: cir.condition(%[[COND]])
 // CHECK-NEXT: } body {
 // CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>>), !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>
 // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>
 // CHECK-NEXT: cir.scope {
 // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
@@ -253,7 +253,7 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: cir.condition(%[[COND]])
 // CHECK-NEXT: } body {
 // CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[BOUND3_STRIDE]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>), !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
+// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>> -> !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
 // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
 // CHECK-NEXT: cir.scope {
 // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
@@ -270,7 +270,7 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: cir.condition(%[[COND]])
 // CHECK-NEXT: } body {
 // CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> -> !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[BOUND1_STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: cir.call @_ZN8CtorDtorD1Ev(%[[BOUND1_STRIDE]]) nothrow : (!cir.ptr<!rec_CtorDtor>) -> ()
 // CHECK-NEXT: cir.yield
@@ -330,7 +330,7 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: } body {
 //
 // CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>>), !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>
 // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>
 //
 // CHECK-NEXT: cir.scope {
@@ -349,10 +349,10 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: cir.condition(%[[COND]])
 // CHECK-NEXT: } body {
 // CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>>), !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
+// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5>> -> !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
 // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<4> : !u64i
-// CHECK-NEXT: %[[ARR_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[ARR_DECAY:.*]] = cir.cast array_to_ptrdecay %[[STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> -> !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[ARR_DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[LAST_IDX]] : !u64i), !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[ARR_IDX:.*]] = cir.alloca !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[ARR_IDX]] : !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
@@ -395,9 +395,9 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_A5_A5_8CtorDtor : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> {{.*}}):
 // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>, !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>>, ["openacc.private.init", init] {alignment = 16 : i64}
-// CHECK-NEXT: %[[BITCAST:.*]] = cir.cast(bitcast, %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>>), !cir.ptr<!cir.array<!rec_CtorDtor x 125>>
+// CHECK-NEXT: %[[BITCAST:.*]] = cir.cast bitcast %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!rec_CtorDtor x 125>>
 // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<125> : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[BITCAST]] : !cir.ptr<!cir.array<!rec_CtorDtor x 125>>), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[BITCAST]] : !cir.ptr<!cir.array<!rec_CtorDtor x 125>> -> !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[ONE_PAST_LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[ARR_SIZE]] : !u64i), !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[ARR_IDX:.*]] = cir.alloca !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[DECAY]], %[[ARR_IDX]] : !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
@@ -416,9 +416,9 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: acc.yield
 // CHECK-NEXT:} destroy {
 // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> {{.*}})):
-// CHECK-NEXT: %[[BITCAST:.*]] = cir.cast(bitcast, %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>>), !cir.ptr<!cir.array<!rec_CtorDtor x 125>>
+// CHECK-NEXT: %[[BITCAST:.*]] = cir.cast bitcast %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_CtorDtor x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!rec_CtorDtor x 125>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<124> : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[BITCAST]] : !cir.ptr<!cir.array<!rec_CtorDtor x 125>>), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[BITCAST]] : !cir.ptr<!cir.array<!rec_CtorDtor x 125>> -> !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[LAST_IDX]] : !u64i), !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[ARR_IDX:.*]] = cir.alloca !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[LAST_ELT]], %[[ARR_IDX]] : !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>
diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-NoOps.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-NoOps.cpp
index 38df8133a38c0..753389f2a3f47 100644
--- a/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-NoOps.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-NoOps.cpp
@@ -20,7 +20,7 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: ^bb0(%arg0: !cir.ptr<!cir.array<!rec_NoOps x 5>> {{.*}}):
 // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!rec_NoOps x 5>, !cir.ptr<!cir.array<!rec_NoOps x 5>>, ["openacc.private.init", init] {alignment = 16 : i64}
 // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<5> : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!rec_NoOps x 5>>), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!rec_NoOps x 5>> -> !cir.ptr<!rec_NoOps>
 // CHECK-NEXT: %[[ONE_PAST_LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_NoOps>, %[[ARR_SIZE]] : !u64i), !cir.ptr<!rec_NoOps>
 // CHECK-NEXT: %[[ARR_IDX:.*]] = cir.alloca !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[DECAY]], %[[ARR_IDX]] : !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>>
@@ -57,9 +57,9 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_A5_5NoOps : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> init {
 // CHECK-NEXT: ^bb0(%arg0: !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> {{.*}}):
 // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!cir.array<!rec_NoOps x 5> x 5>, !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>, ["openacc.private.init", init] {alignment = 16 : i64}
-// CHECK-NEXT: %[[BITCAST:.*]] = cir.cast(bitcast, %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>>), !cir.ptr<!cir.array<!rec_NoOps x 25>>
+// CHECK-NEXT: %[[BITCAST:.*]] = cir.cast bitcast %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.array<!rec_NoOps x 5> x 5>> -> !cir.ptr<!cir.array<!rec_NoOps x 25>>
 // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<25> : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[BITCAST]] : !cir.ptr<!cir.array<!rec_NoOps x 25>>), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[BITCAST]] : !cir.ptr<!cir.array<!rec_NoOps x 25>> -> !cir.ptr<!rec_NoOps>
 // CHECK-NEXT: %[[ONE_PAST_LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_NoOps>, %[[ARR_SIZE]] : !u64i), !cir.ptr<!rec_NoOps>
 // CHECK-NEXT: %[[ARR_IDX:.*]] = cir.alloca !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[DECAY]], %[[ARR_IDX]] : !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>>
@@ -110,9 +110,9 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_A5_A5_5NoOps : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> init {
 // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> {{.*}}):
 // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca !cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>, !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>>, ["openacc.private.init", init] {alignment = 16 : i64}
-// CHECK-NEXT: %[[BITCAST:.*]] = cir.cast(bitcast, %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>>), !cir.ptr<!cir.array<!rec_NoOps x 125>>
+// CHECK-NEXT: %[[BITCAST:.*]] = cir.cast bitcast %[[TL_ALLOCA]] : !cir.ptr<!cir.array<!cir.array<!cir.array<!rec_NoOps x 5> x 5> x 5>> -> !cir.ptr<!cir.array<!rec_NoOps x 125>>
 // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<125> : !u64i
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[BITCAST]] : !cir.ptr<!cir.array<!rec_NoOps x 125>>), !cir.ptr<!rec_NoOps>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[BITCAST]] : !cir.ptr<!cir.array<!rec_NoOps x 125>> -> !cir.ptr<!rec_NoOps>
 // CHECK-NEXT: %[[ONE_PAST_LAST_ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_NoOps>, %[[ARR_SIZE]] : !u64i), !cir.ptr<!rec_NoOps>
 // CHECK-NEXT: %[[ARR_IDX:.*]] = cir.alloca !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[DECAY]], %[[ARR_IDX]] : !cir.ptr<!rec_NoOps>, !cir.ptr<!cir.ptr<!rec_NoOps>>
diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-CtorDtor.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-CtorDtor.cpp
index 52bcd7cd539f2..e17ef90d01212 100644
--- a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-CtorDtor.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-CtorDtor.cpp
@@ -336,7 +336,7 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: cir.condition(%[[COND]])
 // CHECK-NEXT: } body {
 // CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>), !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> -> !cir.ptr<!cir.ptr<!rec_CtorDtor>>
 // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>>
 // CHECK-NEXT: cir.scope {
 // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
@@ -435,7 +435,7 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: cir.condition(%[[COND]])
 // CHECK-NEXT: } body {
 // CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> -> !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: cir.call @_ZN8CtorDtorD1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_CtorDtor>) -> ()
 // CHECK-NEXT: cir.yield
@@ -511,7 +511,7 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: cir.condition(%[[COND]])
 // CHECK-NEXT: } body {
 // CHECK-NEXT: %[[ITR3_LOAD:.*]] = cir.load %[[ITR3]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.ptr<!cir.ptr<!rec_CtorDtor>> x 5>>), !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>
+// CHECK-NEXT: %[[TLA_DECAY:.*]] = cir.cast array_to_ptrdecay %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.ptr<!cir.ptr<!rec_CtorDtor>> x 5>> -> !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>
 // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride(%[[TLA_DECAY]] : !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>, %[[ITR3_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!cir.ptr<!rec_CtorDtor>>>
 // CHECK-NEXT: cir.scope {
 // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index
@@ -683,7 +683,7 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: cir.condition(%[[COND]])
 // CHECK-NEXT: } body {
 // CHECK-NEXT: %[[ITR1_LOAD:.*]] = cir.load %[[ITR1]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[BOUND2_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND2_STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> -> !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: cir.call @_ZN8CtorDtorD1Ev(%[[STRIDE]]) nothrow : (!cir.ptr<!rec_CtorDtor>) -> ()
 // CHECK-NEXT: cir.yield
@@ -777,7 +777,7 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: %[[BOUND2_STRIDE_LOAD:.*]] = cir.load %[[TLA_STRIDE]] : !cir.ptr<!cir.ptr<!cir.array<!rec_CtorDtor x 5>>>, !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
 // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[BOUND2_STRIDE_LOAD]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>, %[[ITR1_LOAD]] : !u64i), !cir.ptr<!cir.array<!rec_CtorDtor x 5>>
 // CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<4> : !u64i 
-// CHECK-NEXT: %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>>), !cir.ptr<!rec_CtorDtor>
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[STRIDE]] : !cir.ptr<!cir.array<!rec_CtorDtor x 5>> -> !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[ELT:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_CtorDtor>, %[[LAST_IDX]] : !u64i), !cir.ptr<!rec_CtorDtor>
 // CHECK-NEXT: %[[IDX:.*]] = cir.alloca !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>>, ["__array_idx"] {alignment = 1 : i64}
 // CHECK-NEXT: cir.store %[[ELT]], %[[IDX]] : !cir.ptr<!rec_CtorDtor>, !cir.ptr<!cir.ptr<!rec_CtorDtor>> 
@@ -885,7 +885,7 @@ void do_things(unsigned A, unsigned B) {
 // CHECK-NEXT: cir.condition(%[[COND]])
 // CHECK-NEXT: } body {
 // CHECK-NEXT: %[[ITR2_LOAD:.*]] = cir.load %[[ITR2]] : !cir.ptr<!u64i>, !u64i
-// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[BOUND3_STRIDE]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>>), !cir.ptr<!cir.ptr<!rec_CtorDtor>>
+// CHECK-NEXT: %[[BOUND3_STRIDE_DECAY:.*]] = cir.cast array_to_ptrdecay %[[BOUND3_STRIDE]] : !cir.ptr<!cir.array<!cir.ptr<!rec_CtorDtor> x 5>> -> !cir.ptr<!cir.ptr<!rec_CtorDtor>>
 // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride(%[[BOUND3_STRIDE_DECAY]] : !cir.ptr<!cir.ptr<!rec_CtorDtor>>, %[[ITR2_LOAD]] : !u64i), !cir.ptr<!cir.ptr<!rec_CtorDtor>>
 // CHECK-NEXT: cir.scope {
 // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
diff --git a/clang/test/CIR/CodeGenOpenACC/serial.c b/clang/test/CIR/CodeGenOpenACC/serial.c
index 9e3359141838f..aae4a92b13b0e 100644
--- a/clang/test/CIR/CodeGenOpenACC/serial.c
+++ b/clang/test/CIR/CodeGenOpenACC/serial.c
@@ -29,7 +29,7 @@ void acc_serial(int cond) {
   // CHECK-NEXT: cir.scope {
   // CHECK-NEXT: cir.while {
   // CHECK-NEXT: %[[INT:.*]] = cir.const #cir.int<1>
-  // CHECK-NEXT: %[[CAST:.*]] = cir.cast(int_to_bool, %[[INT]] :
+  // CHECK-NEXT: %[[CAST:.*]] = cir.cast int_to_bool %[[INT]]
   // CHECK-NEXT: cir.condition(%[[CAST]])
   // CHECK-NEXT: } do {
   // CHECK-NEXT: cir.yield
@@ -49,7 +49,7 @@ void acc_serial(int cond) {
 #pragma acc serial self(cond)
   {}
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.serial self(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.yield
@@ -58,7 +58,7 @@ void acc_serial(int cond) {
 #pragma acc serial self(0)
   {}
   // CHECK-NEXT: %[[ZERO_LITERAL:.*]] = cir.const #cir.int<0> : !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[ZERO_LITERAL]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[ZERO_LITERAL]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.serial self(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.yield
@@ -67,7 +67,7 @@ void acc_serial(int cond) {
 #pragma acc serial if(cond)
   {}
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.serial if(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.yield
@@ -76,7 +76,7 @@ void acc_serial(int cond) {
 #pragma acc serial if(1)
   {}
   // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[ONE_LITERAL]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[ONE_LITERAL]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.serial if(%[[CONV_CAST]]) {
   // CHECK-NEXT: acc.yield
diff --git a/clang/test/CIR/CodeGenOpenACC/set.c b/clang/test/CIR/CodeGenOpenACC/set.c
index 0b87f42603776..b8030dfd9d883 100644
--- a/clang/test/CIR/CodeGenOpenACC/set.c
+++ b/clang/test/CIR/CodeGenOpenACC/set.c
@@ -26,7 +26,7 @@ void acc_set(int cond) {
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
   // CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.set device_num(%[[COND_CONV]] : si32) if(%[[BOOL_CONV]])
 
@@ -36,7 +36,7 @@ void acc_set(int cond) {
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
   // CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.set default_async(%[[ONE_CONV]] : si32) device_num(%[[COND_CONV]] : si32) if(%[[BOOL_CONV]]) attributes {device_type = #acc.device_type<radeon>}
 
diff --git a/clang/test/CIR/CodeGenOpenACC/shutdown.c b/clang/test/CIR/CodeGenOpenACC/shutdown.c
index b68ef90e07252..8c27fa6c2d544 100644
--- a/clang/test/CIR/CodeGenOpenACC/shutdown.c
+++ b/clang/test/CIR/CodeGenOpenACC/shutdown.c
@@ -18,13 +18,13 @@ void acc_shutdown(int cond) {
 
 #pragma acc shutdown if(cond)
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.shutdown if(%[[BOOL_CONV]])
 
 #pragma acc shutdown if(1)
   // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
-  // CHECK-NEXT: %[[ONE_TO_BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[ONE_LITERAL]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[ONE_TO_BOOL_CAST:.*]] = cir.cast int_to_bool %[[ONE_LITERAL]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[ONE_TO_BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.shutdown if(%[[BOOL_CONV]])
 
@@ -40,7 +40,7 @@ void acc_shutdown(int cond) {
 
 #pragma acc shutdown if(cond) device_num(cond) device_type(*)
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_CAST]] : !cir.bool to i1
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
   // CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
diff --git a/clang/test/CIR/CodeGenOpenACC/wait.c b/clang/test/CIR/CodeGenOpenACC/wait.c
index aeda8b955a6d0..8be8665923c59 100644
--- a/clang/test/CIR/CodeGenOpenACC/wait.c
+++ b/clang/test/CIR/CodeGenOpenACC/wait.c
@@ -10,7 +10,7 @@ void acc_wait(int cond) {
 
 #pragma acc wait if (cond)
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: acc.wait if(%[[CONV_CAST]])
 
@@ -37,7 +37,7 @@ void acc_wait(int cond) {
 
 #pragma acc wait(queues:1) if (cond)
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
   // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
@@ -54,7 +54,7 @@ void acc_wait(int cond) {
 
 #pragma acc wait(devnum:1: 2, 3) if (cond)
   // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!s32i>, !s32i
-  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
+  // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool
   // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
   // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
   // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
diff --git a/clang/test/CIR/IR/alloca.cir b/clang/test/CIR/IR/alloca.cir
index 12f7e6ac6a914..4a13c44292b35 100644
--- a/clang/test/CIR/IR/alloca.cir
+++ b/clang/test/CIR/IR/alloca.cir
@@ -12,7 +12,7 @@ module {
     %2 = cir.load align(8) %0 : !cir.ptr<!u64i>, !u64i
     // Dynamically sized alloca
     %3 = cir.alloca !u8i, !cir.ptr<!u8i>, %2 : !u64i, ["bi_alloca"] {alignment = 16 : i64}
-    %4 = cir.cast(bitcast, %3 : !cir.ptr<!u8i>), !cir.ptr<!void>
+    %4 = cir.cast bitcast %3 : !cir.ptr<!u8i> -> !cir.ptr<!void>
     cir.store %4, %1 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
     %5 = cir.load %1 : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
     cir.return %5 : !cir.ptr<!void>
@@ -24,7 +24,7 @@ module {
   // CHECK:   cir.store %arg0, %0 : !u64i, !cir.ptr<!u64i>
   // CHECK:   %2 = cir.load align(8) %0 : !cir.ptr<!u64i>, !u64i
   // CHECK:   %3 = cir.alloca !u8i, !cir.ptr<!u8i>, %2 : !u64i, ["bi_alloca"] {alignment = 16 : i64}
-  // CHECK:   %4 = cir.cast(bitcast, %3 : !cir.ptr<!u8i>), !cir.ptr<!void>
+  // CHECK:   %4 = cir.cast bitcast %3 : !cir.ptr<!u8i> -> !cir.ptr<!void>
   // CHECK:   cir.store %4, %1 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
   // CHECK:   %5 = cir.load %1 : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
   // CHECK:   cir.return %5 : !cir.ptr<!void>
diff --git a/clang/test/CIR/IR/binassign.cir b/clang/test/CIR/IR/binassign.cir
index a25729635094e..6d2c5c8ab6962 100644
--- a/clang/test/CIR/IR/binassign.cir
+++ b/clang/test/CIR/IR/binassign.cir
@@ -12,7 +12,7 @@ module {
     %4 = cir.const #true
     cir.store %4, %0 : !cir.bool, !cir.ptr<!cir.bool>
     %5 = cir.const #cir.int<65> : !s32i
-    %6 = cir.cast(integral, %5 : !s32i), !s8i
+    %6 = cir.cast integral %5 : !s32i -> !s8i
     cir.store %6, %1 : !s8i, !cir.ptr<!s8i>
     %7 = cir.const #cir.fp<3.140000e+00> : !cir.float
     cir.store %7, %2 : !cir.float, !cir.ptr<!cir.float>
@@ -34,7 +34,7 @@ module {
 // CHECK:     %4 = cir.const #true
 // CHECK:     cir.store %4, %0 : !cir.bool, !cir.ptr<!cir.bool>
 // CHECK:     %5 = cir.const #cir.int<65> : !s32i
-// CHECK:     %6 = cir.cast(integral, %5 : !s32i), !s8i
+// CHECK:     %6 = cir.cast integral %5 : !s32i -> !s8i
 // CHECK:     cir.store %6, %1 : !s8i, !cir.ptr<!s8i>
 // CHECK:     %7 = cir.const #cir.fp<3.140000e+00> : !cir.float
 // CHECK:     cir.store %7, %2 : !cir.float, !cir.ptr<!cir.float>
diff --git a/clang/test/CIR/IR/cast.cir b/clang/test/CIR/IR/cast.cir
index a335887de7ec7..11b1664871ef7 100644
--- a/clang/test/CIR/IR/cast.cir
+++ b/clang/test/CIR/IR/cast.cir
@@ -3,21 +3,21 @@
 
 module  {
   cir.func @yolo(%arg0 : !s32i) {
-    %a = cir.cast (int_to_bool, %arg0 : !s32i), !cir.bool
+    %a = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
 
     %0 = cir.const #cir.int<0> : !s32i
     cir.return
   }
 
   cir.func @bitcast(%p: !cir.ptr<!s32i>) {
-    %0 = cir.cast(bitcast, %p : !cir.ptr<!s32i>), !cir.ptr<f32>
+    %0 = cir.cast bitcast %p : !cir.ptr<!s32i> -> !cir.ptr<f32>
     cir.return
   }
 }
 
 // CHECK: cir.func{{.*}} @yolo(%arg0: !s32i)
-// CHECK: %0 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+// CHECK: %0 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
 // CHECK: %1 = cir.const #cir.int<0> : !s32i
 
 // CHECK: cir.func{{.*}} @bitcast
-// CHECK: %0 = cir.cast(bitcast, %arg0 : !cir.ptr<!s32i>), !cir.ptr<f32>
+// CHECK: %0 = cir.cast bitcast %arg0 : !cir.ptr<!s32i> -> !cir.ptr<f32>
diff --git a/clang/test/CIR/IR/cmp.cir b/clang/test/CIR/IR/cmp.cir
index 818527189af01..fdf538d7eef92 100644
--- a/clang/test/CIR/IR/cmp.cir
+++ b/clang/test/CIR/IR/cmp.cir
@@ -274,39 +274,39 @@ module {
     cir.store %arg0, %0 : !cir.bool, !cir.ptr<!cir.bool>
     cir.store %arg1, %1 : !cir.bool, !cir.ptr<!cir.bool>
     %3 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
-    %4 = cir.cast(bool_to_int, %3 : !cir.bool), !s32i
+    %4 = cir.cast bool_to_int %3 : !cir.bool -> !s32i
     %5 = cir.load %1 : !cir.ptr<!cir.bool>, !cir.bool
-    %6 = cir.cast(bool_to_int, %5 : !cir.bool), !s32i
+    %6 = cir.cast bool_to_int %5 : !cir.bool -> !s32i
     %7 = cir.cmp(gt, %4, %6) : !s32i, !cir.bool
     cir.store %7, %2 : !cir.bool, !cir.ptr<!cir.bool>
     %8 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
-    %9 = cir.cast(bool_to_int, %8 : !cir.bool), !s32i
+    %9 = cir.cast bool_to_int %8 : !cir.bool -> !s32i
     %10 = cir.load %1 : !cir.ptr<!cir.bool>, !cir.bool
-    %11 = cir.cast(bool_to_int, %10 : !cir.bool), !s32i
+    %11 = cir.cast bool_to_int %10 : !cir.bool -> !s32i
     %12 = cir.cmp(lt, %9, %11) : !s32i, !cir.bool
     cir.store %12, %2 : !cir.bool, !cir.ptr<!cir.bool>
     %13 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
-    %14 = cir.cast(bool_to_int, %13 : !cir.bool), !s32i
+    %14 = cir.cast bool_to_int %13 : !cir.bool -> !s32i
     %15 = cir.load %1 : !cir.ptr<!cir.bool>, !cir.bool
-    %16 = cir.cast(bool_to_int, %15 : !cir.bool), !s32i
+    %16 = cir.cast bool_to_int %15 : !cir.bool -> !s32i
     %17 = cir.cmp(ge, %14, %16) : !s32i, !cir.bool
     cir.store %17, %2 : !cir.bool, !cir.ptr<!cir.bool>
     %18 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
-    %19 = cir.cast(bool_to_int, %18 : !cir.bool), !s32i
+    %19 = cir.cast bool_to_int %18 : !cir.bool -> !s32i
     %20 = cir.load %1 : !cir.ptr<!cir.bool>, !cir.bool
-    %21 = cir.cast(bool_to_int, %20 : !cir.bool), !s32i
+    %21 = cir.cast bool_to_int %20 : !cir.bool -> !s32i
     %22 = cir.cmp(le, %19, %21) : !s32i, !cir.bool
     cir.store %22, %2 : !cir.bool, !cir.ptr<!cir.bool>
     %23 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
-    %24 = cir.cast(bool_to_int, %23 : !cir.bool), !s32i
+    %24 = cir.cast bool_to_int %23 : !cir.bool -> !s32i
     %25 = cir.load %1 : !cir.ptr<!cir.bool>, !cir.bool
-    %26 = cir.cast(bool_to_int, %25 : !cir.bool), !s32i
+    %26 = cir.cast bool_to_int %25 : !cir.bool -> !s32i
     %27 = cir.cmp(eq, %24, %26) : !s32i, !cir.bool
     cir.store %27, %2 : !cir.bool, !cir.ptr<!cir.bool>
     %28 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
-    %29 = cir.cast(bool_to_int, %28 : !cir.bool), !s32i
+    %29 = cir.cast bool_to_int %28 : !cir.bool -> !s32i
     %30 = cir.load %1 : !cir.ptr<!cir.bool>, !cir.bool
-    %31 = cir.cast(bool_to_int, %30 : !cir.bool), !s32i
+    %31 = cir.cast bool_to_int %30 : !cir.bool -> !s32i
     %32 = cir.cmp(ne, %29, %31) : !s32i, !cir.bool
     cir.store %32, %2 : !cir.bool, !cir.ptr<!cir.bool>
     cir.return
@@ -319,39 +319,39 @@ module {
   // CHECK-NEXT:   cir.store %arg0, %0 : !cir.bool, !cir.ptr<!cir.bool>
   // CHECK-NEXT:   cir.store %arg1, %1 : !cir.bool, !cir.ptr<!cir.bool>
   // CHECK-NEXT:   %3 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
-  // CHECK-NEXT:   %4 = cir.cast(bool_to_int, %3 : !cir.bool), !s32i
+  // CHECK-NEXT:   %4 = cir.cast bool_to_int %3 : !cir.bool -> !s32i
   // CHECK-NEXT:   %5 = cir.load %1 : !cir.ptr<!cir.bool>, !cir.bool
-  // CHECK-NEXT:   %6 = cir.cast(bool_to_int, %5 : !cir.bool), !s32i
+  // CHECK-NEXT:   %6 = cir.cast bool_to_int %5 : !cir.bool -> !s32i
   // CHECK-NEXT:   %7 = cir.cmp(gt, %4, %6) : !s32i, !cir.bool
   // CHECK-NEXT:   cir.store %7, %2 : !cir.bool, !cir.ptr<!cir.bool>
   // CHECK-NEXT:   %8 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
-  // CHECK-NEXT:   %9 = cir.cast(bool_to_int, %8 : !cir.bool), !s32i
+  // CHECK-NEXT:   %9 = cir.cast bool_to_int %8 : !cir.bool -> !s32i
   // CHECK-NEXT:   %10 = cir.load %1 : !cir.ptr<!cir.bool>, !cir.bool
-  // CHECK-NEXT:   %11 = cir.cast(bool_to_int, %10 : !cir.bool), !s32i
+  // CHECK-NEXT:   %11 = cir.cast bool_to_int %10 : !cir.bool -> !s32i
   // CHECK-NEXT:   %12 = cir.cmp(lt, %9, %11) : !s32i, !cir.bool
   // CHECK-NEXT:   cir.store %12, %2 : !cir.bool, !cir.ptr<!cir.bool>
   // CHECK-NEXT:   %13 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
-  // CHECK-NEXT:   %14 = cir.cast(bool_to_int, %13 : !cir.bool), !s32i
+  // CHECK-NEXT:   %14 = cir.cast bool_to_int %13 : !cir.bool -> !s32i
   // CHECK-NEXT:   %15 = cir.load %1 : !cir.ptr<!cir.bool>, !cir.bool
-  // CHECK-NEXT:   %16 = cir.cast(bool_to_int, %15 : !cir.bool), !s32i
+  // CHECK-NEXT:   %16 = cir.cast bool_to_int %15 : !cir.bool -> !s32i
   // CHECK-NEXT:   %17 = cir.cmp(ge, %14, %16) : !s32i, !cir.bool
   // CHECK-NEXT:   cir.store %17, %2 : !cir.bool, !cir.ptr<!cir.bool>
   // CHECK-NEXT:   %18 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
-  // CHECK-NEXT:   %19 = cir.cast(bool_to_int, %18 : !cir.bool), !s32i
+  // CHECK-NEXT:   %19 = cir.cast bool_to_int %18 : !cir.bool -> !s32i
   // CHECK-NEXT:   %20 = cir.load %1 : !cir.ptr<!cir.bool>, !cir.bool
-  // CHECK-NEXT:   %21 = cir.cast(bool_to_int, %20 : !cir.bool), !s32i
+  // CHECK-NEXT:   %21 = cir.cast bool_to_int %20 : !cir.bool -> !s32i
   // CHECK-NEXT:   %22 = cir.cmp(le, %19, %21) : !s32i, !cir.bool
   // CHECK-NEXT:   cir.store %22, %2 : !cir.bool, !cir.ptr<!cir.bool>
   // CHECK-NEXT:   %23 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
-  // CHECK-NEXT:   %24 = cir.cast(bool_to_int, %23 : !cir.bool), !s32i
+  // CHECK-NEXT:   %24 = cir.cast bool_to_int %23 : !cir.bool -> !s32i
   // CHECK-NEXT:   %25 = cir.load %1 : !cir.ptr<!cir.bool>, !cir.bool
-  // CHECK-NEXT:   %26 = cir.cast(bool_to_int, %25 : !cir.bool), !s32i
+  // CHECK-NEXT:   %26 = cir.cast bool_to_int %25 : !cir.bool -> !s32i
   // CHECK-NEXT:   %27 = cir.cmp(eq, %24, %26) : !s32i, !cir.bool
   // CHECK-NEXT:   cir.store %27, %2 : !cir.bool, !cir.ptr<!cir.bool>
   // CHECK-NEXT:   %28 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
-  // CHECK-NEXT:   %29 = cir.cast(bool_to_int, %28 : !cir.bool), !s32i
+  // CHECK-NEXT:   %29 = cir.cast bool_to_int %28 : !cir.bool -> !s32i
   // CHECK-NEXT:   %30 = cir.load %1 : !cir.ptr<!cir.bool>, !cir.bool
-  // CHECK-NEXT:   %31 = cir.cast(bool_to_int, %30 : !cir.bool), !s32i
+  // CHECK-NEXT:   %31 = cir.cast bool_to_int %30 : !cir.bool -> !s32i
   // CHECK-NEXT:   %32 = cir.cmp(ne, %29, %31) : !s32i, !cir.bool
   // CHECK-NEXT:   cir.store %32, %2 : !cir.bool, !cir.ptr<!cir.bool>
   // CHECK-NEXT:   cir.return
diff --git a/clang/test/CIR/IR/vtable-addrpt.cir b/clang/test/CIR/IR/vtable-addrpt.cir
index 0b809cc2506e6..106e7485fbbcf 100644
--- a/clang/test/CIR/IR/vtable-addrpt.cir
+++ b/clang/test/CIR/IR/vtable-addrpt.cir
@@ -14,7 +14,7 @@ module {
     cir.store %arg0, %0 : !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
     %1 = cir.load %0 : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
     %2 = cir.vtable.address_point(@_ZTV1S, address_point = <index = 0, offset = 2>) : !cir.vptr
-    %3 = cir.cast(bitcast, %1 : !cir.ptr<!rec_S>), !cir.ptr<!cir.vptr>
+    %3 = cir.cast bitcast %1 : !cir.ptr<!rec_S> -> !cir.ptr<!cir.vptr>
     cir.store align(8) %2, %3 : !cir.vptr, !cir.ptr<!cir.vptr>
     cir.return
   }
diff --git a/clang/test/CIR/IR/vtt-addrpoint.cir b/clang/test/CIR/IR/vtt-addrpoint.cir
index f05bb782c6911..11e5f4da83b50 100644
--- a/clang/test/CIR/IR/vtt-addrpoint.cir
+++ b/clang/test/CIR/IR/vtt-addrpoint.cir
@@ -26,7 +26,7 @@ module {
 
     cir.call @_ZN1BC2Ev(%4, %5) : (!cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!void>>) -> ()
     %6 = cir.vtt.address_point %3 : !cir.ptr<!cir.ptr<!void>>, offset = 0 -> !cir.ptr<!cir.ptr<!void>>
-    %7 = cir.cast(bitcast, %6 : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!cir.vptr>
+    %7 = cir.cast bitcast %6 : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!cir.vptr>
     %8 = cir.load align(8) %7 : !cir.ptr<!cir.vptr>, !cir.vptr
     %9 = cir.vtable.get_vptr %2 : !cir.ptr<!rec_C> -> !cir.ptr<!cir.vptr>
     cir.store align(8) %8, %9 : !cir.vptr, !cir.ptr<!cir.vptr>
diff --git a/clang/test/CIR/Lowering/cast.cir b/clang/test/CIR/Lowering/cast.cir
index 6842905dae6a4..ec104edec2405 100644
--- a/clang/test/CIR/Lowering/cast.cir
+++ b/clang/test/CIR/Lowering/cast.cir
@@ -26,51 +26,51 @@ module {
 
     // Integer casts.
     %9 = cir.load %0 : !cir.ptr<!u32i>, !u32i
-    %10 = cir.cast(integral, %9 : !u32i), !s8i
+    %10 = cir.cast integral %9 : !u32i -> !s8i
     // CHECK: %{{[0-9]+}} = llvm.trunc %{{[0-9]+}} : i32 to i8
     cir.store %10, %3 : !s8i, !cir.ptr<!s8i>
     %11 = cir.load %1 : !cir.ptr<!s32i>, !s32i
-    %12 = cir.cast(integral, %11 : !s32i), !s16i
+    %12 = cir.cast integral %11 : !s32i -> !s16i
     // CHECK: %{{[0-9]+}} = llvm.trunc %{{[0-9]+}} : i32 to i16
     cir.store %12, %4 : !s16i, !cir.ptr<!s16i>
     %13 = cir.load %0 : !cir.ptr<!u32i>, !u32i
-    %14 = cir.cast(integral, %13 : !u32i), !s64i
+    %14 = cir.cast integral %13 : !u32i -> !s64i
     // CHECK: %{{[0-9]+}} = llvm.zext %{{[0-9]+}} : i32 to i64
     cir.store %14, %5 : !s64i, !cir.ptr<!s64i>
     %15 = cir.load %1 : !cir.ptr<!s32i>, !s32i
-    %16 = cir.cast(integral, %15 : !s32i), !s64i
+    %16 = cir.cast integral %15 : !s32i -> !s64i
     // CHECK: %{{[0-9]+}} = llvm.sext %{{[0-9]+}} : i32 to i64
-    %30 = cir.cast(integral, %arg1 : !s32i), !u32i
+    %30 = cir.cast integral %arg1 : !s32i -> !u32i
     // Should not produce a cast.
-    %32 = cir.cast(integral, %arg0 : !u32i), !s32i
+    %32 = cir.cast integral %arg0 : !u32i -> !s32i
     // Should not produce a cast.
     %21 = cir.load %20 : !cir.ptr<!s16i>, !s16i
-    %22 = cir.cast(integral, %21 : !s16i), !u64i
+    %22 = cir.cast integral %21 : !s16i -> !u64i
     // CHECK: %[[TMP:[0-9]+]] = llvm.sext %{{[0-9]+}} : i16 to i64
-    %33 = cir.cast(int_to_bool, %arg1 : !s32i), !cir.bool
+    %33 = cir.cast int_to_bool %arg1 : !s32i -> !cir.bool
     // CHECK: %[[#ZERO:]] = llvm.mlir.constant(0 : i32) : i32
     // CHECK: %[[#CMP:]] = llvm.icmp "ne" %arg1, %[[#ZERO]] : i32
 
     // Pointer casts.
     cir.store %16, %6 : !s64i, !cir.ptr<!s64i>
-    %23 = cir.cast(int_to_ptr, %22 : !u64i), !cir.ptr<!u8i>
+    %23 = cir.cast int_to_ptr %22 : !u64i -> !cir.ptr<!u8i>
     // CHECK: %[[TMP2:[0-9]+]] = llvm.inttoptr %[[TMP]] : i64 to !llvm.ptr
-    %24 = cir.cast(ptr_to_int, %23 : !cir.ptr<!u8i>), !s32i
+    %24 = cir.cast ptr_to_int %23 : !cir.ptr<!u8i> -> !s32i
     // CHECK: %{{[0-9]+}} = llvm.ptrtoint %[[TMP2]] : !llvm.ptr to i32
-    %29 = cir.cast(ptr_to_bool, %23 : !cir.ptr<!u8i>), !cir.bool
+    %29 = cir.cast ptr_to_bool %23 : !cir.ptr<!u8i> -> !cir.bool
 
     // Floating point casts.
-    %25 = cir.cast(int_to_float, %arg1 : !s32i), !cir.float
+    %25 = cir.cast int_to_float %arg1 : !s32i -> !cir.float
     // CHECK: %{{.+}} = llvm.sitofp %{{.+}} : i32 to f32
-    %26 = cir.cast(int_to_float, %arg0 : !u32i), !cir.float
+    %26 = cir.cast int_to_float %arg0 : !u32i -> !cir.float
     // CHECK: %{{.+}} = llvm.uitofp %{{.+}} : i32 to f32
-    %27 = cir.cast(float_to_int, %arg2 : !cir.float), !s32i
+    %27 = cir.cast float_to_int %arg2 : !cir.float -> !s32i
     // CHECK: %{{.+}} = llvm.fptosi %{{.+}} : f32 to i32
-    %28 = cir.cast(float_to_int, %arg2 : !cir.float), !u32i
+    %28 = cir.cast float_to_int %arg2 : !cir.float -> !u32i
     // CHECK: %{{.+}} = llvm.fptoui %{{.+}} : f32 to i32
     %18 = cir.const #cir.int<0> : !s32i
     // CHECK: %{{.+}} = llvm.fptrunc %{{.+}} : f64 to f32
-    %34 = cir.cast(floating, %arg3 : !cir.double), !cir.float
+    %34 = cir.cast floating %arg3 : !cir.double -> !cir.float
 
     cir.store %18, %2 : !s32i, !cir.ptr<!s32i>
     %19 = cir.load %2 : !cir.ptr<!s32i>, !s32i
@@ -84,7 +84,7 @@ module {
     cir.store %arg0, %0 : !cir.bool, !cir.ptr<!cir.bool>
 
     %2 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
-    %3 = cir.cast(bool_to_int, %2 : !cir.bool), !u8i
+    %3 = cir.cast bool_to_int %2 : !cir.bool -> !u8i
     // CHECK: %[[LOAD_BOOL:.*]] = llvm.load %{{.*}} : !llvm.ptr -> i8
     // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[LOAD_BOOL]] : i8 to i1
     // CHECK: %[[EXT:.*]] = llvm.zext %[[TRUNC]] : i1 to i8
diff --git a/clang/test/CIR/Lowering/if.cir b/clang/test/CIR/Lowering/if.cir
index 3a077aa9ef057..888fb38e2d77c 100644
--- a/clang/test/CIR/Lowering/if.cir
+++ b/clang/test/CIR/Lowering/if.cir
@@ -4,7 +4,7 @@
 
 module {
   cir.func @foo(%arg0: !s32i) -> !s32i {
-    %4 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+    %4 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
     cir.if %4 {
       %5 = cir.const #cir.int<1> : !s32i
       cir.return %5 : !s32i
@@ -44,7 +44,7 @@ module {
 //  LLVM-NEXT: }
 
   cir.func @onlyIf(%arg0: !s32i) -> !s32i {
-    %4 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+    %4 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
     cir.if %4 {
       %5 = cir.const #cir.int<1> : !s32i
       cir.return %5 : !s32i
@@ -66,7 +66,7 @@ module {
   // Verify empty if clause is properly lowered to empty block
   cir.func @emptyIfClause(%arg0: !s32i) -> !s32i {
     // MLIR-LABEL: llvm.func @emptyIfClause
-    %4 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+    %4 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
     // MLIR: llvm.cond_br {{%.*}}, ^[[T:.*]], ^[[PHI:.*]]
     cir.if %4 {
       // MLIR-NEXT: ^[[T]]:
@@ -82,7 +82,7 @@ module {
   // addressed
   cir.func @emptyIfElseClause(%arg0: !s32i) -> !s32i {
     // MLIR-LABEL: llvm.func @emptyIfElseClause
-    %4 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+    %4 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
     // MLIR: llvm.cond_br {{%.*}}, ^[[T:.*]], ^[[F:.*]]
     cir.if %4 {
     // MLIR-NEXT: ^[[T]]:
diff --git a/clang/test/CIR/Lowering/vtt-addrpoint.cir b/clang/test/CIR/Lowering/vtt-addrpoint.cir
index 96dc27d991cd4..e1bfd00245b1b 100644
--- a/clang/test/CIR/Lowering/vtt-addrpoint.cir
+++ b/clang/test/CIR/Lowering/vtt-addrpoint.cir
@@ -24,7 +24,7 @@ module {
     %5 = cir.vtt.address_point %3 : !cir.ptr<!cir.ptr<!void>>, offset = 1 -> !cir.ptr<!cir.ptr<!void>>
     cir.call @_ZN1BC2Ev(%4, %5) : (!cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!void>>) -> ()
     %6 = cir.vtt.address_point %3 : !cir.ptr<!cir.ptr<!void>>, offset = 0 -> !cir.ptr<!cir.ptr<!void>>
-    %7 = cir.cast(bitcast, %6 : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!cir.vptr>
+    %7 = cir.cast bitcast %6 : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!cir.vptr>
     %8 = cir.load align(8) %7 : !cir.ptr<!cir.vptr>, !cir.vptr
     %9 = cir.vtable.get_vptr %2 : !cir.ptr<!rec_C> -> !cir.ptr<!cir.vptr>
     cir.store align(8) %8, %9 : !cir.vptr, !cir.ptr<!cir.vptr>
diff --git a/clang/test/CIR/Transforms/canonicalize.cir b/clang/test/CIR/Transforms/canonicalize.cir
index 5daff119a626f..5606f9e16a690 100644
--- a/clang/test/CIR/Transforms/canonicalize.cir
+++ b/clang/test/CIR/Transforms/canonicalize.cir
@@ -50,39 +50,39 @@ module {
   // CHECK-NEXT: }
 
   cir.func @cast1(%arg0: !cir.bool) -> !cir.bool {
-    %0 = cir.cast(bool_to_int, %arg0 : !cir.bool), !s32i
-    %1 = cir.cast(int_to_bool, %0 : !s32i), !cir.bool
+    %0 = cir.cast bool_to_int %arg0 : !cir.bool -> !s32i
+    %1 = cir.cast int_to_bool %0 : !s32i -> !cir.bool
     cir.return %1 : !cir.bool
   }
   // CHECK:      cir.func{{.*}} @cast1(%[[ARG0:.*]]: !cir.bool) -> !cir.bool
   // CHECK-NEXT:   cir.return %[[ARG0]] : !cir.bool
 
   cir.func @cast2(%arg0: !s32i) -> !cir.bool {
-    %0 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
-    %1 = cir.cast(bool_to_int, %0 : !cir.bool), !s32i
-    %2 = cir.cast(integral, %1 : !s32i), !s64i
-    %3 = cir.cast(int_to_bool, %2 : !s64i), !cir.bool
+    %0 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+    %1 = cir.cast bool_to_int %0 : !cir.bool -> !s32i
+    %2 = cir.cast integral %1 : !s32i -> !s64i
+    %3 = cir.cast int_to_bool %2 : !s64i -> !cir.bool
     cir.return %3 : !cir.bool
   }
   // CHECK:      cir.func{{.*}} @cast2(%[[ARG0:.*]]: !s32i) -> !cir.bool
-  // CHECK-NEXT:   %[[CAST:.*]] = cir.cast(int_to_bool, %[[ARG0]] : !s32i), !cir.bool
+  // CHECK-NEXT:   %[[CAST:.*]] = cir.cast int_to_bool %[[ARG0]] : !s32i -> !cir.bool
   // CHECK-NEXT:   cir.return %[[CAST]] : !cir.bool
 
   cir.func @no_fold_cast(%arg0: !s32i) -> !s64i {
-    %0 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
-    %1 = cir.cast(bool_to_int, %0 : !cir.bool), !s32i
-    %2 = cir.cast(integral, %1 : !s32i), !s64i
+    %0 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+    %1 = cir.cast bool_to_int %0 : !cir.bool -> !s32i
+    %2 = cir.cast integral %1 : !s32i -> !s64i
     cir.return %2 : !s64i
   }
   // CHECK:      cir.func{{.*}} @no_fold_cast(%[[ARG0:.*]]: !s32i) -> !s64i
-  // CHECK-NEXT:   %[[CAST:.*]] = cir.cast(int_to_bool, %[[ARG0]] : !s32i), !cir.bool
-  // CHECK-NEXT:   %[[CAST2:.*]] = cir.cast(bool_to_int, %[[CAST]] : !cir.bool), !s32i
-  // CHECK-NEXT:   %[[CAST3:.*]] = cir.cast(integral, %[[CAST2]] : !s32i), !s64i
+  // CHECK-NEXT:   %[[CAST:.*]] = cir.cast int_to_bool %[[ARG0]] : !s32i -> !cir.bool
+  // CHECK-NEXT:   %[[CAST2:.*]] = cir.cast bool_to_int %[[CAST]] : !cir.bool -> !s32i
+  // CHECK-NEXT:   %[[CAST3:.*]] = cir.cast integral %[[CAST2]] : !s32i -> !s64i
   // CHECK-NEXT:   cir.return %[[CAST3]] : !s64i
 
   cir.func @cast_poison() -> !s64i {
     %0 = cir.const #cir.poison : !s32i
-    %1 = cir.cast(integral, %0 : !s32i), !s64i
+    %1 = cir.cast integral %0 : !s32i -> !s64i
     cir.return %1 : !s64i
   }
   // CHECK:      @cast_poison
diff --git a/clang/test/CIR/Transforms/if.cir b/clang/test/CIR/Transforms/if.cir
index 3f817c793643f..ced288f7ecf29 100644
--- a/clang/test/CIR/Transforms/if.cir
+++ b/clang/test/CIR/Transforms/if.cir
@@ -4,7 +4,7 @@
 
 module {
   cir.func @foo(%arg0: !s32i) -> !s32i {
-    %4 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+    %4 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
     cir.if %4 {
       %5 = cir.const #cir.int<1> : !s32i
       cir.return %5 : !s32i
@@ -15,7 +15,7 @@ module {
     cir.return %arg0 : !s32i
   }
 //      CHECK: cir.func{{.*}} @foo(%arg0: !s32i) -> !s32i {
-// CHECK-NEXT:   %0 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+// CHECK-NEXT:   %0 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
 // CHECK-NEXT:   cir.brcond %0 ^bb1, ^bb2
 // CHECK-NEXT: ^bb1:  // pred: ^bb0
 // CHECK-NEXT:   %1 = cir.const #cir.int<1> : !s32i
@@ -28,7 +28,7 @@ module {
 // CHECK-NEXT: }
 
   cir.func @onlyIf(%arg0: !s32i) -> !s32i {
-    %4 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+    %4 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
     cir.if %4 {
       %5 = cir.const #cir.int<1> : !s32i
       cir.return %5 : !s32i
@@ -36,7 +36,7 @@ module {
     cir.return %arg0 : !s32i
   }
 //      CHECK: cir.func{{.*}} @onlyIf(%arg0: !s32i) -> !s32i {
-// CHECK-NEXT:   %0 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+// CHECK-NEXT:   %0 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
 // CHECK-NEXT:   cir.brcond %0 ^bb1, ^bb2
 // CHECK-NEXT: ^bb1:  // pred: ^bb0
 // CHECK-NEXT:   %1 = cir.const #cir.int<1> : !s32i
diff --git a/clang/test/CIR/Transforms/switch.cir b/clang/test/CIR/Transforms/switch.cir
index a000d6b70fbcc..3addfe37061cd 100644
--- a/clang/test/CIR/Transforms/switch.cir
+++ b/clang/test/CIR/Transforms/switch.cir
@@ -261,8 +261,8 @@ module {
 // CHECK-NEXT:     %[[RANGE:[0-9]+]] = cir.const #cir.int<99>
 // CHECK-NEXT:     %[[LOWER_BOUND:[0-9]+]] = cir.const #cir.int<1>
 // CHECK-NEXT:     %[[DIFF:[0-9]+]] = cir.binop(sub, %[[X]], %[[LOWER_BOUND]])
-// CHECK-NEXT:     %[[U_DIFF:[0-9]+]] = cir.cast(integral, %[[DIFF]] : !s32i), !u32i
-// CHECK-NEXT:     %[[U_RANGE:[0-9]+]] = cir.cast(integral, %[[RANGE]] : !s32i), !u32i
+// CHECK-NEXT:     %[[U_DIFF:[0-9]+]] = cir.cast integral %[[DIFF]] : !s32i -> !u32i
+// CHECK-NEXT:     %[[U_RANGE:[0-9]+]] = cir.cast integral %[[RANGE]] : !s32i -> !u32i
 // CHECK-NEXT:     %[[CMP_RESULT:[0-9]+]] = cir.cmp(le, %[[U_DIFF]], %[[U_RANGE]])
 // CHECK-NEXT:     cir.brcond %[[CMP_RESULT]] ^[[CASE_RANGE]], ^[[CASE_DEFAULT:bb[0-9]+]]
 // CHECK-NEXT:  ^[[CASE_DEFAULT]]:
@@ -304,8 +304,8 @@ module {
 // CHECK:    %[[CONST97:.*]] = cir.const #cir.int<97> : !s32i
 // CHECK:    %[[CONST3:.*]] = cir.const #cir.int<3> : !s32i
 // CHECK:    %[[SUB:.*]] = cir.binop(sub, %[[COND]], %[[CONST3]]) : !s32i
-// CHECK:    %[[CAST1:.*]] = cir.cast(integral, %[[SUB]] : !s32i), !u32i
-// CHECK:    %[[CAST2:.*]] = cir.cast(integral, %[[CONST97]] : !s32i), !u32i
+// CHECK:    %[[CAST1:.*]] = cir.cast integral %[[SUB]] : !s32i -> !u32i
+// CHECK:    %[[CAST2:.*]] = cir.cast integral %[[CONST97]] : !s32i -> !u32i
 // CHECK:    %[[CMP:.*]] = cir.cmp(le, %[[CAST1]], %[[CAST2]]) : !u32i, !cir.bool
 // CHECK:    cir.brcond %7 ^bb[[#DEFAULT_BB]], ^bb[[#RANGE_BB:]]
 // CHECK:  ^bb[[#RANGE_BB]]:  // pred: ^bb[[#RANGE_BR]]

>From bafc40a67c1403da27f955f8e9cc72a0d5834e2d Mon Sep 17 00:00:00 2001
From: Hongyu Chen <xxs_chy at outlook.com>
Date: Wed, 1 Oct 2025 16:47:41 +0800
Subject: [PATCH 27/48] [GlobalOpt] Check if users are CallBase when changing
 CC (#161399)

Fixes https://github.com/llvm/llvm-project/issues/156656
`hasChangeableCCImpl` guarantees the address of the function is not
taken, but it ignores assume-like calls.
This patch ignores assume-like calls when changing CC.
---
 llvm/lib/Transforms/IPO/GlobalOpt.cpp    | 16 +++--
 llvm/test/Transforms/GlobalOpt/fastcc.ll | 90 +++++++++++++++++++-----
 2 files changed, 85 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index f88d51f443bcf..99c4982c58b47 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1680,7 +1680,9 @@ processGlobal(GlobalValue &GV,
 /// FastCC.
 static void ChangeCalleesToFastCall(Function *F) {
   for (User *U : F->users())
-    cast<CallBase>(U)->setCallingConv(CallingConv::Fast);
+    if (auto *Call = dyn_cast<CallBase>(U))
+      if (Call->getCalledOperand() == F)
+        Call->setCallingConv(CallingConv::Fast);
 }
 
 static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
@@ -1766,10 +1768,12 @@ isValidCandidateForColdCC(Function &F,
     return false;
 
   for (User *U : F.users()) {
-    CallBase &CB = cast<CallBase>(*U);
-    Function *CallerFunc = CB.getParent()->getParent();
+    CallBase *CB = dyn_cast<CallBase>(U);
+    if (!CB || CB->getCalledOperand() != &F)
+      continue;
+    Function *CallerFunc = CB->getParent()->getParent();
     BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
-    if (!isColdCallSite(CB, CallerBFI))
+    if (!isColdCallSite(*CB, CallerBFI))
       return false;
     if (!llvm::is_contained(AllCallsCold, CallerFunc))
       return false;
@@ -1779,7 +1783,9 @@ isValidCandidateForColdCC(Function &F,
 
 static void changeCallSitesToColdCC(Function *F) {
   for (User *U : F->users())
-    cast<CallBase>(U)->setCallingConv(CallingConv::Cold);
+    if (auto *Call = dyn_cast<CallBase>(U))
+      if (Call->getCalledOperand() == F)
+        Call->setCallingConv(CallingConv::Cold);
 }
 
 // This function iterates over all the call instructions in the input Function
diff --git a/llvm/test/Transforms/GlobalOpt/fastcc.ll b/llvm/test/Transforms/GlobalOpt/fastcc.ll
index 854357e6fad97..edbd602a97f3b 100644
--- a/llvm/test/Transforms/GlobalOpt/fastcc.ll
+++ b/llvm/test/Transforms/GlobalOpt/fastcc.ll
@@ -1,16 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=globalopt -S | FileCheck %s
 
 declare token @llvm.call.preallocated.setup(i32)
 declare ptr @llvm.call.preallocated.arg(token, i32)
 
 define internal i32 @f(ptr %m) {
-; CHECK-LABEL: define internal fastcc i32 @f
+; CHECK-LABEL: define internal fastcc i32 @f(
+; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr {
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[M]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
   %v = load i32, ptr %m
   ret i32 %v
 }
 
 define internal x86_thiscallcc i32 @g(ptr %m) {
-; CHECK-LABEL: define internal fastcc i32 @g
+; CHECK-LABEL: define internal fastcc i32 @g(
+; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr {
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[M]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
   %v = load i32, ptr %m
   ret i32 %v
 }
@@ -18,41 +27,80 @@ define internal x86_thiscallcc i32 @g(ptr %m) {
 ; Leave this one alone, because the user went out of their way to request this
 ; convention.
 define internal coldcc i32 @h(ptr %m) {
-; CHECK-LABEL: define internal coldcc i32 @h
+; CHECK-LABEL: define internal coldcc i32 @h(
+; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr {
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[M]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
   %v = load i32, ptr %m
   ret i32 %v
 }
 
 define internal i32 @j(ptr %m) {
-; CHECK-LABEL: define internal i32 @j
+; CHECK-LABEL: define internal i32 @j(
+; CHECK-SAME: ptr [[M:%.*]]) {
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[M]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
   %v = load i32, ptr %m
   ret i32 %v
 }
 
 define internal i32 @inalloca(ptr inalloca(i32) %p) {
-; CHECK-LABEL: define internal fastcc i32 @inalloca(ptr %p)
+; CHECK-LABEL: define internal fastcc i32 @inalloca(
+; CHECK-SAME: ptr [[P:%.*]]) unnamed_addr {
+; CHECK-NEXT:    [[RV:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    ret i32 [[RV]]
+;
   %rv = load i32, ptr %p
   ret i32 %rv
 }
 
 define i32 @inalloca2_caller(ptr inalloca(i32) %p) {
+; CHECK-LABEL: define i32 @inalloca2_caller(
+; CHECK-SAME: ptr inalloca(i32) [[P:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[RV:%.*]] = musttail call i32 @inalloca2(ptr inalloca(i32) [[P]])
+; CHECK-NEXT:    ret i32 [[RV]]
+;
   %rv = musttail call i32 @inalloca2(ptr inalloca(i32) %p)
   ret i32 %rv
 }
 define internal i32 @inalloca2(ptr inalloca(i32) %p) {
 ; Because of the musttail caller, this inalloca cannot be dropped.
-; CHECK-LABEL: define internal i32 @inalloca2(ptr inalloca(i32) %p)
+; CHECK-LABEL: define internal i32 @inalloca2(
+; CHECK-SAME: ptr inalloca(i32) [[P:%.*]]) unnamed_addr {
+; CHECK-NEXT:    [[RV:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    ret i32 [[RV]]
+;
   %rv = load i32, ptr %p
   ret i32 %rv
 }
 
 define internal i32 @preallocated(ptr preallocated(i32) %p) {
-; CHECK-LABEL: define internal fastcc i32 @preallocated(ptr %p)
+; CHECK-LABEL: define internal fastcc i32 @preallocated(
+; CHECK-SAME: ptr [[P:%.*]]) unnamed_addr {
+; CHECK-NEXT:    [[RV:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    ret i32 [[RV]]
+;
   %rv = load i32, ptr %p
   ret i32 %rv
 }
 
 define void @call_things() {
+; CHECK-LABEL: define void @call_things() local_unnamed_addr {
+; CHECK-NEXT:    [[M:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call fastcc i32 @f(ptr [[M]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call fastcc i32 @g(ptr [[M]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call coldcc i32 @h(ptr [[M]])
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @j(ptr [[M]])
+; CHECK-NEXT:    [[ARGS:%.*]] = alloca inalloca i32, align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = call fastcc i32 @inalloca(ptr [[ARGS]])
+; CHECK-NEXT:    [[TMP6:%.*]] = call ptr @llvm.stacksave.p0()
+; CHECK-NEXT:    [[PAARG:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = call fastcc i32 @preallocated(ptr [[PAARG]])
+; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[TMP6]])
+; CHECK-NEXT:    ret void
+;
   %m = alloca i32
   call i32 @f(ptr %m)
   call x86_thiscallcc i32 @g(ptr %m)
@@ -65,15 +113,25 @@ define void @call_things() {
   call i32 @preallocated(ptr preallocated(i32) %N) ["preallocated"(token %c)]
   ret void
 }
-; CHECK-LABEL: define void @call_things()
-; CHECK: call fastcc i32 @f
-; CHECK: call fastcc i32 @g
-; CHECK: call coldcc i32 @h
-; CHECK: call i32 @j
-; CHECK: call fastcc i32 @inalloca(ptr %args)
-; CHECK-NOT: llvm.call.preallocated
-; CHECK: call fastcc i32 @preallocated(ptr %paarg)
 
 @llvm.used = appending global [1 x ptr] [
-   ptr @j
+  ptr @j
 ], section "llvm.metadata"
+
+define internal i32 @assume_fastcc() {
+; CHECK-LABEL: define internal fastcc i32 @assume_fastcc() {
+; CHECK-NEXT:    [[OBJSIZE:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr @assume_fastcc, i1 false, i1 false, i1 false)
+; CHECK-NEXT:    ret i32 [[OBJSIZE]]
+;
+  %objsize = call i32 @llvm.objectsize.i32.p0(ptr @assume_fastcc, i1 false, i1 false, i1 false)
+  ret i32 %objsize
+}
+
+define internal i32 @constexpr_self_user() addrspace(1) {
+; CHECK-LABEL: define internal fastcc i32 @constexpr_self_user() addrspace(1) {
+; CHECK-NEXT:    [[OBJSIZE:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr addrspacecast (ptr addrspace(1) @constexpr_self_user to ptr), i1 false, i1 false, i1 false)
+; CHECK-NEXT:    ret i32 [[OBJSIZE]]
+;
+  %objsize = call i32 @llvm.objectsize.i32.p0(ptr addrspacecast (ptr addrspace(1) @constexpr_self_user to ptr), i1 false, i1 false, i1 false)
+  ret i32 %objsize
+}

>From ffa37472f6b82a4aab90c7b5bab489d4129060df Mon Sep 17 00:00:00 2001
From: Pierre van Houtryve <pierre.vanhoutryve at amd.com>
Date: Wed, 1 Oct 2025 10:51:00 +0200
Subject: [PATCH 28/48] [AMDGPU][InsertWaitCnts] Refactor some helper
 functions, NFC (#161160)

- Remove one-line wrappers around a simple function call when they're
only used once or twice.
- Move very generic helpers into SIInstrInfo
- Delete unused functions

The goal is simply to reduce the noise in SIInsertWaitCnts without
hiding functionality. I focused on moving trivial helpers, or helpers
with very descriptive/verbose names (so it doesn't hide too much logic
away from the pass), and that have some reusability potential.

I'm also trying to make the code style more consistent. It doesn't make
sense to see a function call `TII->isXXX` then suddenly call a random
`isY` method that just wraps around `TII->isY`.

The context of this work is that I'm trying to learn how this pass
works, and while going through the code I noticed some little things
here and there that I thought would be good to fix.
---
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 115 +++-----------------
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp      |  53 +++++++++
 llvm/lib/Target/AMDGPU/SIInstrInfo.h        |  23 ++++
 3 files changed, 89 insertions(+), 102 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index f291191dbfd5c..91136fd85c545 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -495,13 +495,6 @@ class SIInsertWaitcnts {
   bool isVMEMOrFlatVMEM(const MachineInstr &MI) const;
   bool run(MachineFunction &MF);
 
-  bool isForceEmitWaitcnt() const {
-    for (auto T : inst_counter_types())
-      if (ForceEmitWaitcnt[T])
-        return true;
-    return false;
-  }
-
   void setForceEmitWaitcnt() {
 // For non-debug builds, ForceEmitWaitcnt has been initialized to false;
 // For debug builds, get the debug counter info and adjust if need be
@@ -570,10 +563,6 @@ class SIInsertWaitcnts {
     return VmemReadMapping[getVmemType(Inst)];
   }
 
-  bool hasXcnt() const { return ST->hasWaitXCnt(); }
-
-  bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const;
-  bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
   bool isVmemAccess(const MachineInstr &MI) const;
   bool generateWaitcntInstBefore(MachineInstr &MI,
                                  WaitcntBrackets &ScoreBrackets,
@@ -591,7 +580,6 @@ class SIInsertWaitcnts {
                              WaitcntBrackets &ScoreBrackets);
   bool insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block,
                             WaitcntBrackets &ScoreBrackets);
-  static bool asynchronouslyWritesSCC(unsigned Opcode);
 };
 
 // This objects maintains the current score brackets of each wait counter, and
@@ -1109,7 +1097,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
         setRegScore(FIRST_LDS_VGPR, T, CurrScore);
     }
 
-    if (Context->asynchronouslyWritesSCC(Inst.getOpcode())) {
+    if (SIInstrInfo::isSBarrierSCCWrite(Inst.getOpcode())) {
       setRegScore(SCC, T, CurrScore);
       PendingSCCWrite = &Inst;
     }
@@ -1831,12 +1819,6 @@ bool WaitcntGeneratorGFX12Plus::createNewWaitcnt(
   return Modified;
 }
 
-static bool readsVCCZ(const MachineInstr &MI) {
-  unsigned Opc = MI.getOpcode();
-  return (Opc == AMDGPU::S_CBRANCH_VCCNZ || Opc == AMDGPU::S_CBRANCH_VCCZ) &&
-         !MI.getOperand(1).isUndef();
-}
-
 /// \returns true if the callee inserts an s_waitcnt 0 on function entry.
 static bool callWaitsOnFunctionEntry(const MachineInstr &MI) {
   // Currently all conventions wait, but this may not always be the case.
@@ -2061,7 +2043,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
           ScoreBrackets.determineWait(SmemAccessCounter, Interval, Wait);
         }
 
-        if (hasXcnt() && Op.isDef())
+        if (ST->hasWaitXCnt() && Op.isDef())
           ScoreBrackets.determineWait(X_CNT, Interval, Wait);
       }
     }
@@ -2087,10 +2069,9 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   // TODO: Remove this work-around, enable the assert for Bug 457939
   //       after fixing the scheduler. Also, the Shader Compiler code is
   //       independent of target.
-  if (readsVCCZ(MI) && ST->hasReadVCCZBug()) {
-    if (ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
-      Wait.DsCnt = 0;
-    }
+  if (SIInstrInfo::isCBranchVCCZRead(MI) && ST->hasReadVCCZBug() &&
+      ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
+    Wait.DsCnt = 0;
   }
 
   // Verify that the wait is actually needed.
@@ -2185,75 +2166,11 @@ bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait,
   return Modified;
 }
 
-// This is a flat memory operation. Check to see if it has memory tokens other
-// than LDS. Other address spaces supported by flat memory operations involve
-// global memory.
-bool SIInsertWaitcnts::mayAccessVMEMThroughFlat(const MachineInstr &MI) const {
-  assert(TII->isFLAT(MI));
-
-  // All flat instructions use the VMEM counter except prefetch.
-  if (!TII->usesVM_CNT(MI))
-    return false;
-
-  // If there are no memory operands then conservatively assume the flat
-  // operation may access VMEM.
-  if (MI.memoperands_empty())
-    return true;
-
-  // See if any memory operand specifies an address space that involves VMEM.
-  // Flat operations only supported FLAT, LOCAL (LDS), or address spaces
-  // involving VMEM such as GLOBAL, CONSTANT, PRIVATE (SCRATCH), etc. The REGION
-  // (GDS) address space is not supported by flat operations. Therefore, simply
-  // return true unless only the LDS address space is found.
-  for (const MachineMemOperand *Memop : MI.memoperands()) {
-    unsigned AS = Memop->getAddrSpace();
-    assert(AS != AMDGPUAS::REGION_ADDRESS);
-    if (AS != AMDGPUAS::LOCAL_ADDRESS)
-      return true;
-  }
-
-  return false;
-}
-
-// This is a flat memory operation. Check to see if it has memory tokens for
-// either LDS or FLAT.
-bool SIInsertWaitcnts::mayAccessLDSThroughFlat(const MachineInstr &MI) const {
-  assert(TII->isFLAT(MI));
-
-  // Flat instruction such as SCRATCH and GLOBAL do not use the lgkm counter.
-  if (!TII->usesLGKM_CNT(MI))
-    return false;
-
-  // If in tgsplit mode then there can be no use of LDS.
-  if (ST->isTgSplitEnabled())
-    return false;
-
-  // If there are no memory operands then conservatively assume the flat
-  // operation may access LDS.
-  if (MI.memoperands_empty())
-    return true;
-
-  // See if any memory operand specifies an address space that involves LDS.
-  for (const MachineMemOperand *Memop : MI.memoperands()) {
-    unsigned AS = Memop->getAddrSpace();
-    if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS)
-      return true;
-  }
-
-  return false;
-}
-
 bool SIInsertWaitcnts::isVmemAccess(const MachineInstr &MI) const {
-  return (TII->isFLAT(MI) && mayAccessVMEMThroughFlat(MI)) ||
+  return (TII->isFLAT(MI) && TII->mayAccessVMEMThroughFlat(MI)) ||
          (TII->isVMEM(MI) && !AMDGPU::getMUBUFIsBufferInv(MI.getOpcode()));
 }
 
-static bool isGFX12CacheInvOrWBInst(MachineInstr &Inst) {
-  auto Opc = Inst.getOpcode();
-  return Opc == AMDGPU::GLOBAL_INV || Opc == AMDGPU::GLOBAL_WB ||
-         Opc == AMDGPU::GLOBAL_WBINV;
-}
-
 // Return true if the next instruction is S_ENDPGM, following fallthrough
 // blocks if necessary.
 bool SIInsertWaitcnts::isNextENDPGM(MachineBasicBlock::instr_iterator It,
@@ -2331,7 +2248,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
       ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst);
     }
   } else if (TII->isFLAT(Inst)) {
-    if (isGFX12CacheInvOrWBInst(Inst)) {
+    if (SIInstrInfo::isGFX12CacheInvOrWBInst(Inst.getOpcode())) {
       ScoreBrackets->updateByEvent(TII, TRI, MRI, getVmemWaitEventType(Inst),
                                    Inst);
       return;
@@ -2341,14 +2258,14 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
 
     int FlatASCount = 0;
 
-    if (mayAccessVMEMThroughFlat(Inst)) {
+    if (TII->mayAccessVMEMThroughFlat(Inst)) {
       ++FlatASCount;
       IsVMEMAccess = true;
       ScoreBrackets->updateByEvent(TII, TRI, MRI, getVmemWaitEventType(Inst),
                                    Inst);
     }
 
-    if (mayAccessLDSThroughFlat(Inst)) {
+    if (TII->mayAccessLDSThroughFlat(Inst)) {
       ++FlatASCount;
       ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst);
     }
@@ -2394,7 +2311,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
       ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_POS_ACCESS, Inst);
     else
       ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_GPR_LOCK, Inst);
-  } else if (asynchronouslyWritesSCC(Inst.getOpcode())) {
+  } else if (SIInstrInfo::isSBarrierSCCWrite(Inst.getOpcode())) {
     ScoreBrackets->updateByEvent(TII, TRI, MRI, SCC_WRITE, Inst);
   } else {
     switch (Inst.getOpcode()) {
@@ -2413,7 +2330,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
     }
   }
 
-  if (!hasXcnt())
+  if (!ST->hasWaitXCnt())
     return;
 
   if (IsVMEMAccess)
@@ -2516,12 +2433,6 @@ static bool isWaitInstr(MachineInstr &Inst) {
          counterTypeForInstr(Opcode).has_value();
 }
 
-bool SIInsertWaitcnts::asynchronouslyWritesSCC(unsigned Opcode) {
-  return Opcode == AMDGPU::S_BARRIER_LEAVE ||
-         Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM ||
-         Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0;
-}
-
 // Generate s_waitcnt instructions where needed.
 bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
                                             MachineBasicBlock &Block,
@@ -2578,7 +2489,7 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
     OldWaitcntInstr = nullptr;
 
     // Restore vccz if it's not known to be correct already.
-    bool RestoreVCCZ = !VCCZCorrect && readsVCCZ(Inst);
+    bool RestoreVCCZ = !VCCZCorrect && SIInstrInfo::isCBranchVCCZRead(Inst);
 
     // Don't examine operands unless we need to track vccz correctness.
     if (ST->hasReadVCCZBug() || !ST->partialVCCWritesUpdateVCCZ()) {
@@ -2701,7 +2612,7 @@ bool SIInsertWaitcnts::isPreheaderToFlush(
 
 bool SIInsertWaitcnts::isVMEMOrFlatVMEM(const MachineInstr &MI) const {
   if (SIInstrInfo::isFLAT(MI))
-    return mayAccessVMEMThroughFlat(MI);
+    return TII->mayAccessVMEMThroughFlat(MI);
   return SIInstrInfo::isVMEM(MI);
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 044ea866342c2..56435a50c87ad 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4344,6 +4344,59 @@ bool SIInstrInfo::mayAccessScratchThroughFlat(const MachineInstr &MI) const {
   });
 }
 
+bool SIInstrInfo::mayAccessVMEMThroughFlat(const MachineInstr &MI) const {
+  assert(isFLAT(MI));
+
+  // All flat instructions use the VMEM counter except prefetch.
+  if (!usesVM_CNT(MI))
+    return false;
+
+  // If there are no memory operands then conservatively assume the flat
+  // operation may access VMEM.
+  if (MI.memoperands_empty())
+    return true;
+
+  // See if any memory operand specifies an address space that involves VMEM.
+  // Flat operations only supported FLAT, LOCAL (LDS), or address spaces
+  // involving VMEM such as GLOBAL, CONSTANT, PRIVATE (SCRATCH), etc. The REGION
+  // (GDS) address space is not supported by flat operations. Therefore, simply
+  // return true unless only the LDS address space is found.
+  for (const MachineMemOperand *Memop : MI.memoperands()) {
+    unsigned AS = Memop->getAddrSpace();
+    assert(AS != AMDGPUAS::REGION_ADDRESS);
+    if (AS != AMDGPUAS::LOCAL_ADDRESS)
+      return true;
+  }
+
+  return false;
+}
+
+bool SIInstrInfo::mayAccessLDSThroughFlat(const MachineInstr &MI) const {
+  assert(isFLAT(MI));
+
+  // Flat instruction such as SCRATCH and GLOBAL do not use the lgkm counter.
+  if (!usesLGKM_CNT(MI))
+    return false;
+
+  // If in tgsplit mode then there can be no use of LDS.
+  if (ST.isTgSplitEnabled())
+    return false;
+
+  // If there are no memory operands then conservatively assume the flat
+  // operation may access LDS.
+  if (MI.memoperands_empty())
+    return true;
+
+  // See if any memory operand specifies an address space that involves LDS.
+  for (const MachineMemOperand *Memop : MI.memoperands()) {
+    unsigned AS = Memop->getAddrSpace();
+    if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS)
+      return true;
+  }
+
+  return false;
+}
+
 bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) {
   // Skip the full operand and register alias search modifiesRegister
   // does. There's only a handful of instructions that touch this, it's only an
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index c2252afdbb064..a21089f8e0fcc 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -688,6 +688,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
   /// to not hit scratch.
   bool mayAccessScratchThroughFlat(const MachineInstr &MI) const;
 
+  /// \returns true for FLAT instructions that can access VMEM.
+  bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const;
+
+  /// \returns true for FLAT instructions that can access LDS.
+  bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
+
   static bool isBlockLoadStore(uint16_t Opcode) {
     switch (Opcode) {
     case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
@@ -748,6 +754,18 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
     return isLDSDMA(MI) && MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD;
   }
 
+  static bool isSBarrierSCCWrite(unsigned Opcode) {
+    return Opcode == AMDGPU::S_BARRIER_LEAVE ||
+           Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM ||
+           Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0;
+  }
+
+  static bool isCBranchVCCZRead(const MachineInstr &MI) {
+    unsigned Opc = MI.getOpcode();
+    return (Opc == AMDGPU::S_CBRANCH_VCCNZ || Opc == AMDGPU::S_CBRANCH_VCCZ) &&
+           !MI.getOperand(1).isUndef();
+  }
+
   static bool isWQM(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::WQM;
   }
@@ -1010,6 +1028,11 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
            Opcode == AMDGPU::DS_GWS_BARRIER;
   }
 
+  static bool isGFX12CacheInvOrWBInst(unsigned Opc) {
+    return Opc == AMDGPU::GLOBAL_INV || Opc == AMDGPU::GLOBAL_WB ||
+           Opc == AMDGPU::GLOBAL_WBINV;
+  }
+
   static bool isF16PseudoScalarTrans(unsigned Opcode) {
     return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
            Opcode == AMDGPU::V_S_LOG_F16_e64 ||

>From d913c887d2fe069c5abffaba6a1e4e9a00b2d050 Mon Sep 17 00:00:00 2001
From: Pierre van Houtryve <pierre.vanhoutryve at amd.com>
Date: Wed, 1 Oct 2025 10:53:32 +0200
Subject: [PATCH 29/48] [AMDGPU][SIInsertWaitCnts] De-duplicate code (NFC)
 (#161161)

I'm reading through the pass over and over again to try and learn how it works. I noticed some code duplication here and there while doing that.
---
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 64 ++++++++++-----------
 llvm/lib/Target/AMDGPU/SIInstrInfo.h        |  5 ++
 2 files changed, 34 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 91136fd85c545..3f9a1f492ace5 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1853,26 +1853,24 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   assert(!MI.isMetaInstruction());
 
   AMDGPU::Waitcnt Wait;
+  const unsigned Opc = MI.getOpcode();
 
   // FIXME: This should have already been handled by the memory legalizer.
   // Removing this currently doesn't affect any lit tests, but we need to
   // verify that nothing was relying on this. The number of buffer invalidates
   // being handled here should not be expanded.
-  if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 ||
-      MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC ||
-      MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL ||
-      MI.getOpcode() == AMDGPU::BUFFER_GL0_INV ||
-      MI.getOpcode() == AMDGPU::BUFFER_GL1_INV) {
+  if (Opc == AMDGPU::BUFFER_WBINVL1 || Opc == AMDGPU::BUFFER_WBINVL1_SC ||
+      Opc == AMDGPU::BUFFER_WBINVL1_VOL || Opc == AMDGPU::BUFFER_GL0_INV ||
+      Opc == AMDGPU::BUFFER_GL1_INV) {
     Wait.LoadCnt = 0;
   }
 
   // All waits must be resolved at call return.
   // NOTE: this could be improved with knowledge of all call sites or
   //   with knowledge of the called routines.
-  if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
-      MI.getOpcode() == AMDGPU::SI_RETURN ||
-      MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
-      MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
+  if (Opc == AMDGPU::SI_RETURN_TO_EPILOG || Opc == AMDGPU::SI_RETURN ||
+      Opc == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
+      Opc == AMDGPU::S_SETPC_B64_return ||
       (MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
     Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false));
   }
@@ -1884,8 +1882,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   // send a message to explicitly release all VGPRs before the stores have
   // completed, but it is only safe to do this if there are no outstanding
   // scratch stores.
-  else if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
-           MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
+  else if (Opc == AMDGPU::S_ENDPGM || Opc == AMDGPU::S_ENDPGM_SAVED) {
     if (!WCG->isOptNone() &&
         (MI.getMF()->getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled() ||
          (ST->getGeneration() >= AMDGPUSubtarget::GFX11 &&
@@ -1894,8 +1891,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
       ReleaseVGPRInsts.insert(&MI);
   }
   // Resolve vm waits before gs-done.
-  else if ((MI.getOpcode() == AMDGPU::S_SENDMSG ||
-            MI.getOpcode() == AMDGPU::S_SENDMSGHALT) &&
+  else if ((Opc == AMDGPU::S_SENDMSG || Opc == AMDGPU::S_SENDMSGHALT) &&
            ST->hasLegacyGeometry() &&
            ((MI.getOperand(0).getImm() & AMDGPU::SendMsg::ID_MASK_PreGFX11_) ==
             AMDGPU::SendMsg::ID_GS_DONE_PreGFX11)) {
@@ -1920,7 +1916,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
 
     // Wait for any pending GDS instruction to complete before any
     // "Always GDS" instruction.
-    if (TII->isAlwaysGDS(MI.getOpcode()) && ScoreBrackets.hasPendingGDS())
+    if (TII->isAlwaysGDS(Opc) && ScoreBrackets.hasPendingGDS())
       addWait(Wait, DS_CNT, ScoreBrackets.getPendingGDSWait());
 
     if (MI.isCall() && callWaitsOnFunctionEntry(MI)) {
@@ -1946,7 +1942,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
                                       Wait);
         }
       }
-    } else if (MI.getOpcode() == AMDGPU::S_BARRIER_WAIT) {
+    } else if (Opc == AMDGPU::S_BARRIER_WAIT) {
       ScoreBrackets.tryClearSCCWriteEvent(&MI);
     } else {
       // FIXME: Should not be relying on memoperands.
@@ -2061,8 +2057,8 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   //
   // In all other cases, ensure safety by ensuring that there are no outstanding
   // memory operations.
-  if (MI.getOpcode() == AMDGPU::S_BARRIER &&
-      !ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) {
+  if (Opc == AMDGPU::S_BARRIER && !ST->hasAutoWaitcntBeforeBarrier() &&
+      !ST->supportsBackOffBarrier()) {
     Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/true));
   }
 
@@ -2146,19 +2142,19 @@ bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait,
   }
 
   // XCnt may be already consumed by a load wait.
-  if (Wait.KmCnt == 0 && Wait.XCnt != ~0u &&
-      !ScoreBrackets.hasPendingEvent(SMEM_GROUP))
-    Wait.XCnt = ~0u;
+  if (Wait.XCnt != ~0u) {
+    if (Wait.KmCnt == 0 && !ScoreBrackets.hasPendingEvent(SMEM_GROUP))
+      Wait.XCnt = ~0u;
 
-  if (Wait.LoadCnt == 0 && Wait.XCnt != ~0u &&
-      !ScoreBrackets.hasPendingEvent(VMEM_GROUP))
-    Wait.XCnt = ~0u;
+    if (Wait.LoadCnt == 0 && !ScoreBrackets.hasPendingEvent(VMEM_GROUP))
+      Wait.XCnt = ~0u;
 
-  // Since the translation for VMEM addresses occur in-order, we can skip the
-  // XCnt if the current instruction is of VMEM type and has a memory dependency
-  // with another VMEM instruction in flight.
-  if (Wait.XCnt != ~0u && isVmemAccess(*It))
-    Wait.XCnt = ~0u;
+    // Since the translation for VMEM addresses occur in-order, we can skip the
+    // XCnt if the current instruction is of VMEM type and has a memory
+    // dependency with another VMEM instruction in flight.
+    if (isVmemAccess(*It))
+      Wait.XCnt = ~0u;
+  }
 
   if (WCG->createNewWaitcnt(Block, It, Wait))
     Modified = true;
@@ -2395,9 +2391,8 @@ bool WaitcntBrackets::merge(const WaitcntBrackets &Other) {
         unsigned OldEventsHasSCCWrite = OldEvents & (1 << SCC_WRITE);
         if (!OldEventsHasSCCWrite) {
           PendingSCCWrite = Other.PendingSCCWrite;
-        } else {
-          if (PendingSCCWrite != Other.PendingSCCWrite)
-            PendingSCCWrite = nullptr;
+        } else if (PendingSCCWrite != Other.PendingSCCWrite) {
+          PendingSCCWrite = nullptr;
         }
       }
     }
@@ -2635,11 +2630,10 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
   for (MachineBasicBlock *MBB : ML->blocks()) {
     for (MachineInstr &MI : *MBB) {
       if (isVMEMOrFlatVMEM(MI)) {
-        if (MI.mayLoad())
-          HasVMemLoad = true;
-        if (MI.mayStore())
-          HasVMemStore = true;
+        HasVMemLoad |= MI.mayLoad();
+        HasVMemStore |= MI.mayStore();
       }
+
       for (const MachineOperand &Op : MI.all_uses()) {
         if (Op.isDebug() || !TRI->isVectorRegister(*MRI, Op.getReg()))
           continue;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index a21089f8e0fcc..754f52a28e710 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1033,6 +1033,11 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
            Opc == AMDGPU::GLOBAL_WBINV;
   }
 
+  static bool isGFX12CacheInvOrWBInst(unsigned Opc) {
+    return Opc == AMDGPU::GLOBAL_INV || Opc == AMDGPU::GLOBAL_WB ||
+           Opc == AMDGPU::GLOBAL_WBINV;
+  }
+
   static bool isF16PseudoScalarTrans(unsigned Opcode) {
     return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
            Opcode == AMDGPU::V_S_LOG_F16_e64 ||

>From 012aa090589b63489d55597fbea8efccf37183f3 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 1 Oct 2025 10:06:01 +0100
Subject: [PATCH 30/48] [DAGCombine] Support (shl %x, constant) in
 foldPartialReduceMLAMulOp. (#160663)

Support shifts in foldPartialReduceMLAMulOp by treating (shl %x, %c) as
(mul %x, (shl 1, %c)).

PR: https://github.com/llvm/llvm-project/pull/160663
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 24 +++++-
 .../neon-partial-reduce-dot-product.ll        | 86 +++++++++++++++----
 2 files changed, 88 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 204e1f0c75e00..558c5a0390228 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12994,13 +12994,31 @@ SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
   SDValue Op1 = N->getOperand(1);
   SDValue Op2 = N->getOperand(2);
 
-  APInt C;
-  if (Op1->getOpcode() != ISD::MUL ||
-      !ISD::isConstantSplatVector(Op2.getNode(), C) || !C.isOne())
+  unsigned Opc = Op1->getOpcode();
+  if (Opc != ISD::MUL && Opc != ISD::SHL)
     return SDValue();
 
   SDValue LHS = Op1->getOperand(0);
   SDValue RHS = Op1->getOperand(1);
+
+  // Try to treat (shl %a, %c) as (mul %a, (1 << %c)) for constant %c.
+  if (Opc == ISD::SHL) {
+    APInt C;
+    if (!ISD::isConstantSplatVector(RHS.getNode(), C))
+      return SDValue();
+
+    RHS =
+        DAG.getSplatVector(RHS.getValueType(), DL,
+                           DAG.getConstant(APInt(C.getBitWidth(), 1).shl(C), DL,
+                                           RHS.getValueType().getScalarType()));
+    Opc = ISD::MUL;
+  }
+
+  APInt C;
+  if (Opc != ISD::MUL || !ISD::isConstantSplatVector(Op2.getNode(), C) ||
+      !C.isOne())
+    return SDValue();
+
   unsigned LHSOpcode = LHS->getOpcode();
   if (!ISD::isExtOpcode(LHSOpcode))
     return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
index d60c870003e4d..428750740fc56 100644
--- a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
+++ b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
@@ -1257,21 +1257,55 @@ entry:
 }
 
 define <4 x i32> @partial_reduce_shl_sext_const_rhs6(<16 x i8> %l, <4 x i32> %part) {
-; CHECK-COMMON-LABEL: partial_reduce_shl_sext_const_rhs6:
+; CHECK-NODOT-LABEL: partial_reduce_shl_sext_const_rhs6:
+; CHECK-NODOT:       // %bb.0:
+; CHECK-NODOT-NEXT:    sshll v2.8h, v0.8b, #0
+; CHECK-NODOT-NEXT:    sshll2 v0.8h, v0.16b, #0
+; CHECK-NODOT-NEXT:    sshll v3.4s, v0.4h, #6
+; CHECK-NODOT-NEXT:    sshll2 v4.4s, v2.8h, #6
+; CHECK-NODOT-NEXT:    sshll v2.4s, v2.4h, #6
+; CHECK-NODOT-NEXT:    sshll2 v0.4s, v0.8h, #6
+; CHECK-NODOT-NEXT:    add v1.4s, v1.4s, v2.4s
+; CHECK-NODOT-NEXT:    add v2.4s, v4.4s, v3.4s
+; CHECK-NODOT-NEXT:    add v1.4s, v1.4s, v2.4s
+; CHECK-NODOT-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-NODOT-NEXT:    ret
+;
+; CHECK-DOT-LABEL: partial_reduce_shl_sext_const_rhs6:
+; CHECK-DOT:       // %bb.0:
+; CHECK-DOT-NEXT:    movi v2.16b, #64
+; CHECK-DOT-NEXT:    sdot v1.4s, v0.16b, v2.16b
+; CHECK-DOT-NEXT:    mov v0.16b, v1.16b
+; CHECK-DOT-NEXT:    ret
+;
+; CHECK-DOT-I8MM-LABEL: partial_reduce_shl_sext_const_rhs6:
+; CHECK-DOT-I8MM:       // %bb.0:
+; CHECK-DOT-I8MM-NEXT:    movi v2.16b, #64
+; CHECK-DOT-I8MM-NEXT:    sdot v1.4s, v0.16b, v2.16b
+; CHECK-DOT-I8MM-NEXT:    mov v0.16b, v1.16b
+; CHECK-DOT-I8MM-NEXT:    ret
+  %ext = sext <16 x i8> %l to <16 x i32>
+  %shift = shl nsw <16 x i32> %ext, splat (i32 6)
+  %red = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %part, <16 x i32> %shift)
+  ret <4 x i32> %red
+}
+
+define <4 x i32> @partial_reduce_shl_sext_const_rhs7(<16 x i8> %l, <4 x i32> %part) {
+; CHECK-COMMON-LABEL: partial_reduce_shl_sext_const_rhs7:
 ; CHECK-COMMON:       // %bb.0:
 ; CHECK-COMMON-NEXT:    sshll v2.8h, v0.8b, #0
 ; CHECK-COMMON-NEXT:    sshll2 v0.8h, v0.16b, #0
-; CHECK-COMMON-NEXT:    sshll v3.4s, v0.4h, #6
-; CHECK-COMMON-NEXT:    sshll2 v4.4s, v2.8h, #6
-; CHECK-COMMON-NEXT:    sshll v2.4s, v2.4h, #6
-; CHECK-COMMON-NEXT:    sshll2 v0.4s, v0.8h, #6
+; CHECK-COMMON-NEXT:    sshll v3.4s, v0.4h, #7
+; CHECK-COMMON-NEXT:    sshll2 v4.4s, v2.8h, #7
+; CHECK-COMMON-NEXT:    sshll v2.4s, v2.4h, #7
+; CHECK-COMMON-NEXT:    sshll2 v0.4s, v0.8h, #7
 ; CHECK-COMMON-NEXT:    add v1.4s, v1.4s, v2.4s
 ; CHECK-COMMON-NEXT:    add v2.4s, v4.4s, v3.4s
 ; CHECK-COMMON-NEXT:    add v1.4s, v1.4s, v2.4s
 ; CHECK-COMMON-NEXT:    add v0.4s, v1.4s, v0.4s
 ; CHECK-COMMON-NEXT:    ret
   %ext = sext <16 x i8> %l to <16 x i32>
-  %shift = shl nsw <16 x i32> %ext, splat (i32 6)
+  %shift = shl nsw <16 x i32> %ext, splat (i32 7)
   %red = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %part, <16 x i32> %shift)
   ret <4 x i32> %red
 }
@@ -1331,19 +1365,33 @@ define <4 x i32> @partial_reduce_shl_sext_non_const_rhs(<16 x i8> %l, <4 x i32>
 }
 
 define <4 x i32> @partial_reduce_shl_zext_const_rhs6(<16 x i8> %l, <4 x i32> %part) {
-; CHECK-COMMON-LABEL: partial_reduce_shl_zext_const_rhs6:
-; CHECK-COMMON:       // %bb.0:
-; CHECK-COMMON-NEXT:    ushll v2.8h, v0.8b, #0
-; CHECK-COMMON-NEXT:    ushll2 v0.8h, v0.16b, #0
-; CHECK-COMMON-NEXT:    ushll v3.4s, v0.4h, #6
-; CHECK-COMMON-NEXT:    ushll2 v4.4s, v2.8h, #6
-; CHECK-COMMON-NEXT:    ushll v2.4s, v2.4h, #6
-; CHECK-COMMON-NEXT:    ushll2 v0.4s, v0.8h, #6
-; CHECK-COMMON-NEXT:    add v1.4s, v1.4s, v2.4s
-; CHECK-COMMON-NEXT:    add v2.4s, v4.4s, v3.4s
-; CHECK-COMMON-NEXT:    add v1.4s, v1.4s, v2.4s
-; CHECK-COMMON-NEXT:    add v0.4s, v1.4s, v0.4s
-; CHECK-COMMON-NEXT:    ret
+; CHECK-NODOT-LABEL: partial_reduce_shl_zext_const_rhs6:
+; CHECK-NODOT:       // %bb.0:
+; CHECK-NODOT-NEXT:    ushll v2.8h, v0.8b, #0
+; CHECK-NODOT-NEXT:    ushll2 v0.8h, v0.16b, #0
+; CHECK-NODOT-NEXT:    ushll v3.4s, v0.4h, #6
+; CHECK-NODOT-NEXT:    ushll2 v4.4s, v2.8h, #6
+; CHECK-NODOT-NEXT:    ushll v2.4s, v2.4h, #6
+; CHECK-NODOT-NEXT:    ushll2 v0.4s, v0.8h, #6
+; CHECK-NODOT-NEXT:    add v1.4s, v1.4s, v2.4s
+; CHECK-NODOT-NEXT:    add v2.4s, v4.4s, v3.4s
+; CHECK-NODOT-NEXT:    add v1.4s, v1.4s, v2.4s
+; CHECK-NODOT-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-NODOT-NEXT:    ret
+;
+; CHECK-DOT-LABEL: partial_reduce_shl_zext_const_rhs6:
+; CHECK-DOT:       // %bb.0:
+; CHECK-DOT-NEXT:    movi v2.16b, #64
+; CHECK-DOT-NEXT:    udot v1.4s, v0.16b, v2.16b
+; CHECK-DOT-NEXT:    mov v0.16b, v1.16b
+; CHECK-DOT-NEXT:    ret
+;
+; CHECK-DOT-I8MM-LABEL: partial_reduce_shl_zext_const_rhs6:
+; CHECK-DOT-I8MM:       // %bb.0:
+; CHECK-DOT-I8MM-NEXT:    movi v2.16b, #64
+; CHECK-DOT-I8MM-NEXT:    udot v1.4s, v0.16b, v2.16b
+; CHECK-DOT-I8MM-NEXT:    mov v0.16b, v1.16b
+; CHECK-DOT-I8MM-NEXT:    ret
   %ext = zext <16 x i8> %l to <16 x i32>
   %shift = shl nsw <16 x i32> %ext, splat (i32 6)
   %red = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %part, <16 x i32> %shift)

>From 7e4cc3147e332c7d4b4cabe0daa4ff0a884b8f00 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Wed, 1 Oct 2025 11:08:29 +0200
Subject: [PATCH 31/48] [AMDGPU] Remove duplicate definition of
 isGFX12CacheInvOrWBInst

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 754f52a28e710..a21089f8e0fcc 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1033,11 +1033,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
            Opc == AMDGPU::GLOBAL_WBINV;
   }
 
-  static bool isGFX12CacheInvOrWBInst(unsigned Opc) {
-    return Opc == AMDGPU::GLOBAL_INV || Opc == AMDGPU::GLOBAL_WB ||
-           Opc == AMDGPU::GLOBAL_WBINV;
-  }
-
   static bool isF16PseudoScalarTrans(unsigned Opcode) {
     return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
            Opcode == AMDGPU::V_S_LOG_F16_e64 ||

>From a876141243f1bdc7c532a1ed47cdc46af96a9c6a Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 1 Oct 2025 11:06:36 +0100
Subject: [PATCH 32/48] Fix MSVC "result of 32-bit shift implicitly converted
 to 64 bits" warning. NFC. (#161496)

---
 llvm/lib/CAS/OnDiskTrieRawHashMap.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
index 9b382dd749ea5..940389336ce22 100644
--- a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
+++ b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
@@ -114,7 +114,7 @@ class SubtrieHandle {
   using SlotT = std::atomic<int64_t>;
 
   static int64_t getSlotsSize(uint32_t NumBits) {
-    return sizeof(int64_t) * (1u << NumBits);
+    return sizeof(int64_t) * (1ull << NumBits);
   }
 
   static int64_t getSize(uint32_t NumBits) {
@@ -191,7 +191,8 @@ class SubtrieHandle {
   MutableArrayRef<SlotT> Slots;
 
   static MutableArrayRef<SlotT> getSlots(Header &H) {
-    return MutableArrayRef(reinterpret_cast<SlotT *>(&H + 1), 1u << H.NumBits);
+    return MutableArrayRef(reinterpret_cast<SlotT *>(&H + 1),
+                           1ull << H.NumBits);
   }
 };
 

>From f0a3fd3ffe36628ff8c1335a78d029233162dae8 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Wed, 1 Oct 2025 10:29:21 +0100
Subject: [PATCH 33/48] [lldb][Mangled][NFC] Remove redundant const-qualifier
 on llvm::StringRef argument

---
 lldb/include/lldb/Core/Mangled.h | 2 +-
 lldb/source/Core/Mangled.cpp     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lldb/include/lldb/Core/Mangled.h b/lldb/include/lldb/Core/Mangled.h
index 47f1c6a8d80b7..665accb3119e3 100644
--- a/lldb/include/lldb/Core/Mangled.h
+++ b/lldb/include/lldb/Core/Mangled.h
@@ -251,7 +251,7 @@ class Mangled {
   /// \return
   ///     eManglingSchemeNone if no known mangling scheme could be identified
   ///     for s, otherwise the enumerator for the mangling scheme detected.
-  static Mangled::ManglingScheme GetManglingScheme(llvm::StringRef const name);
+  static Mangled::ManglingScheme GetManglingScheme(llvm::StringRef name);
 
   static bool IsMangledName(llvm::StringRef name);
 
diff --git a/lldb/source/Core/Mangled.cpp b/lldb/source/Core/Mangled.cpp
index 91b9c0007617d..0780846b0ed60 100644
--- a/lldb/source/Core/Mangled.cpp
+++ b/lldb/source/Core/Mangled.cpp
@@ -40,7 +40,7 @@ bool Mangled::IsMangledName(llvm::StringRef name) {
   return Mangled::GetManglingScheme(name) != Mangled::eManglingSchemeNone;
 }
 
-Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {
+Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef name) {
   if (name.empty())
     return Mangled::eManglingSchemeNone;
 

>From 6ce16352131b974469b1a489c3e5afe55b659e33 Mon Sep 17 00:00:00 2001
From: Pierre van Houtryve <pierre.vanhoutryve at amd.com>
Date: Wed, 1 Oct 2025 12:08:12 +0200
Subject: [PATCH 34/48] [AMDGPU][SIInsertWaitCnts] Remove redundant TII/TRI/MRI
 arguments (NFC) (#161357)

WaitCntBrackets already has a pointer to its SIInsertWaitCnt instance.
With a small change, it can directly access TII/TRI/MRI that way.
This simplifies a lot of call sites which make the code easier to
follow.
---
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 121 +++++++++-----------
 1 file changed, 54 insertions(+), 67 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 3f9a1f492ace5..76bfce8c0f6f9 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -418,15 +418,14 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
 class SIInsertWaitcnts {
 public:
   const GCNSubtarget *ST;
+  const SIInstrInfo *TII = nullptr;
+  const SIRegisterInfo *TRI = nullptr;
+  const MachineRegisterInfo *MRI = nullptr;
   InstCounterType SmemAccessCounter;
   InstCounterType MaxCounter;
   const unsigned *WaitEventMaskForInst;
 
 private:
-  const SIInstrInfo *TII = nullptr;
-  const SIRegisterInfo *TRI = nullptr;
-  const MachineRegisterInfo *MRI = nullptr;
-
   DenseMap<const Value *, MachineBasicBlock *> SLoadAddresses;
   DenseMap<MachineBasicBlock *, bool> PreheadersToFlush;
   MachineLoopInfo *MLI;
@@ -631,8 +630,6 @@ class WaitcntBrackets {
   bool merge(const WaitcntBrackets &Other);
 
   RegInterval getRegInterval(const MachineInstr *MI,
-                             const MachineRegisterInfo *MRI,
-                             const SIRegisterInfo *TRI,
                              const MachineOperand &Op) const;
 
   bool counterOutOfOrder(InstCounterType T) const;
@@ -650,9 +647,7 @@ class WaitcntBrackets {
   void applyWaitcnt(const AMDGPU::Waitcnt &Wait);
   void applyWaitcnt(InstCounterType T, unsigned Count);
   void applyXcnt(const AMDGPU::Waitcnt &Wait);
-  void updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI,
-                     const MachineRegisterInfo *MRI, WaitEventType E,
-                     MachineInstr &MI);
+  void updateByEvent(WaitEventType E, MachineInstr &MI);
 
   unsigned hasPendingEvent() const { return PendingEvents; }
   unsigned hasPendingEvent(WaitEventType E) const {
@@ -761,10 +756,8 @@ class WaitcntBrackets {
   void setScoreByInterval(RegInterval Interval, InstCounterType CntTy,
                           unsigned Score);
 
-  void setScoreByOperand(const MachineInstr *MI, const SIRegisterInfo *TRI,
-                         const MachineRegisterInfo *MRI,
-                         const MachineOperand &Op, InstCounterType CntTy,
-                         unsigned Val);
+  void setScoreByOperand(const MachineInstr *MI, const MachineOperand &Op,
+                         InstCounterType CntTy, unsigned Val);
 
   const SIInsertWaitcnts *Context;
 
@@ -821,12 +814,13 @@ class SIInsertWaitcntsLegacy : public MachineFunctionPass {
 } // end anonymous namespace
 
 RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
-                                            const MachineRegisterInfo *MRI,
-                                            const SIRegisterInfo *TRI,
                                             const MachineOperand &Op) const {
   if (Op.getReg() == AMDGPU::SCC)
     return {SCC, SCC + 1};
 
+  const SIRegisterInfo *TRI = Context->TRI;
+  const MachineRegisterInfo *MRI = Context->MRI;
+
   if (!TRI->isInAllocatableClass(Op.getReg()))
     return {-1, -1};
 
@@ -891,11 +885,9 @@ void WaitcntBrackets::setScoreByInterval(RegInterval Interval,
 }
 
 void WaitcntBrackets::setScoreByOperand(const MachineInstr *MI,
-                                        const SIRegisterInfo *TRI,
-                                        const MachineRegisterInfo *MRI,
                                         const MachineOperand &Op,
                                         InstCounterType CntTy, unsigned Score) {
-  RegInterval Interval = getRegInterval(MI, MRI, TRI, Op);
+  RegInterval Interval = getRegInterval(MI, Op);
   setScoreByInterval(Interval, CntTy, Score);
 }
 
@@ -927,10 +919,7 @@ bool WaitcntBrackets::hasPointSamplePendingVmemTypes(
   return hasOtherPendingVmemTypes(Interval, VMEM_NOSAMPLER);
 }
 
-void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
-                                    const SIRegisterInfo *TRI,
-                                    const MachineRegisterInfo *MRI,
-                                    WaitEventType E, MachineInstr &Inst) {
+void WaitcntBrackets::updateByEvent(WaitEventType E, MachineInstr &Inst) {
   InstCounterType T = eventCounter(Context->WaitEventMaskForInst, E);
 
   unsigned UB = getScoreUB(T);
@@ -943,6 +932,10 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
   PendingEvents |= 1 << E;
   setScoreUB(T, CurrScore);
 
+  const SIRegisterInfo *TRI = Context->TRI;
+  const MachineRegisterInfo *MRI = Context->MRI;
+  const SIInstrInfo *TII = Context->TII;
+
   if (T == EXP_CNT) {
     // Put score on the source vgprs. If this is a store, just use those
     // specific register(s).
@@ -950,59 +943,56 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
       // All GDS operations must protect their address register (same as
       // export.)
       if (const auto *AddrOp = TII->getNamedOperand(Inst, AMDGPU::OpName::addr))
-        setScoreByOperand(&Inst, TRI, MRI, *AddrOp, EXP_CNT, CurrScore);
+        setScoreByOperand(&Inst, *AddrOp, EXP_CNT, CurrScore);
 
       if (Inst.mayStore()) {
         if (const auto *Data0 =
                 TII->getNamedOperand(Inst, AMDGPU::OpName::data0))
-          setScoreByOperand(&Inst, TRI, MRI, *Data0, EXP_CNT, CurrScore);
+          setScoreByOperand(&Inst, *Data0, EXP_CNT, CurrScore);
         if (const auto *Data1 =
                 TII->getNamedOperand(Inst, AMDGPU::OpName::data1))
-          setScoreByOperand(&Inst, TRI, MRI, *Data1, EXP_CNT, CurrScore);
+          setScoreByOperand(&Inst, *Data1, EXP_CNT, CurrScore);
       } else if (SIInstrInfo::isAtomicRet(Inst) && !SIInstrInfo::isGWS(Inst) &&
                  Inst.getOpcode() != AMDGPU::DS_APPEND &&
                  Inst.getOpcode() != AMDGPU::DS_CONSUME &&
                  Inst.getOpcode() != AMDGPU::DS_ORDERED_COUNT) {
         for (const MachineOperand &Op : Inst.all_uses()) {
           if (TRI->isVectorRegister(*MRI, Op.getReg()))
-            setScoreByOperand(&Inst, TRI, MRI, Op, EXP_CNT, CurrScore);
+            setScoreByOperand(&Inst, Op, EXP_CNT, CurrScore);
         }
       }
     } else if (TII->isFLAT(Inst)) {
       if (Inst.mayStore()) {
-        setScoreByOperand(&Inst, TRI, MRI,
+        setScoreByOperand(&Inst,
                           *TII->getNamedOperand(Inst, AMDGPU::OpName::data),
                           EXP_CNT, CurrScore);
       } else if (SIInstrInfo::isAtomicRet(Inst)) {
-        setScoreByOperand(&Inst, TRI, MRI,
+        setScoreByOperand(&Inst,
                           *TII->getNamedOperand(Inst, AMDGPU::OpName::data),
                           EXP_CNT, CurrScore);
       }
     } else if (TII->isMIMG(Inst)) {
       if (Inst.mayStore()) {
-        setScoreByOperand(&Inst, TRI, MRI, Inst.getOperand(0), EXP_CNT,
-                          CurrScore);
+        setScoreByOperand(&Inst, Inst.getOperand(0), EXP_CNT, CurrScore);
       } else if (SIInstrInfo::isAtomicRet(Inst)) {
-        setScoreByOperand(&Inst, TRI, MRI,
+        setScoreByOperand(&Inst,
                           *TII->getNamedOperand(Inst, AMDGPU::OpName::data),
                           EXP_CNT, CurrScore);
       }
     } else if (TII->isMTBUF(Inst)) {
       if (Inst.mayStore())
-        setScoreByOperand(&Inst, TRI, MRI, Inst.getOperand(0), EXP_CNT,
-                          CurrScore);
+        setScoreByOperand(&Inst, Inst.getOperand(0), EXP_CNT, CurrScore);
     } else if (TII->isMUBUF(Inst)) {
       if (Inst.mayStore()) {
-        setScoreByOperand(&Inst, TRI, MRI, Inst.getOperand(0), EXP_CNT,
-                          CurrScore);
+        setScoreByOperand(&Inst, Inst.getOperand(0), EXP_CNT, CurrScore);
       } else if (SIInstrInfo::isAtomicRet(Inst)) {
-        setScoreByOperand(&Inst, TRI, MRI,
+        setScoreByOperand(&Inst,
                           *TII->getNamedOperand(Inst, AMDGPU::OpName::data),
                           EXP_CNT, CurrScore);
       }
     } else if (TII->isLDSDIR(Inst)) {
       // LDSDIR instructions attach the score to the destination.
-      setScoreByOperand(&Inst, TRI, MRI,
+      setScoreByOperand(&Inst,
                         *TII->getNamedOperand(Inst, AMDGPU::OpName::vdst),
                         EXP_CNT, CurrScore);
     } else {
@@ -1013,18 +1003,18 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
         // score.
         for (MachineOperand &DefMO : Inst.all_defs()) {
           if (TRI->isVGPR(*MRI, DefMO.getReg())) {
-            setScoreByOperand(&Inst, TRI, MRI, DefMO, EXP_CNT, CurrScore);
+            setScoreByOperand(&Inst, DefMO, EXP_CNT, CurrScore);
           }
         }
       }
       for (const MachineOperand &Op : Inst.all_uses()) {
         if (TRI->isVectorRegister(*MRI, Op.getReg()))
-          setScoreByOperand(&Inst, TRI, MRI, Op, EXP_CNT, CurrScore);
+          setScoreByOperand(&Inst, Op, EXP_CNT, CurrScore);
       }
     }
   } else if (T == X_CNT) {
     for (const MachineOperand &Op : Inst.all_uses())
-      setScoreByOperand(&Inst, TRI, MRI, Op, T, CurrScore);
+      setScoreByOperand(&Inst, Op, T, CurrScore);
   } else /* LGKM_CNT || EXP_CNT || VS_CNT || NUM_INST_CNTS */ {
     // Match the score to the destination registers.
     //
@@ -1036,7 +1026,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
     // Special cases where implicit register defs exists, such as M0 or VCC,
     // but none with memory instructions.
     for (const MachineOperand &Op : Inst.defs()) {
-      RegInterval Interval = getRegInterval(&Inst, MRI, TRI, Op);
+      RegInterval Interval = getRegInterval(&Inst, Op);
       if (T == LOAD_CNT || T == SAMPLE_CNT || T == BVH_CNT) {
         if (Interval.first >= NUM_ALL_VGPRS)
           continue;
@@ -1928,7 +1918,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
       const auto &CallAddrOp = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
       if (CallAddrOp.isReg()) {
         RegInterval CallAddrOpInterval =
-            ScoreBrackets.getRegInterval(&MI, MRI, TRI, CallAddrOp);
+            ScoreBrackets.getRegInterval(&MI, CallAddrOp);
 
         ScoreBrackets.determineWait(SmemAccessCounter, CallAddrOpInterval,
                                     Wait);
@@ -1936,7 +1926,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
         if (const auto *RtnAddrOp =
                 TII->getNamedOperand(MI, AMDGPU::OpName::dst)) {
           RegInterval RtnAddrOpInterval =
-              ScoreBrackets.getRegInterval(&MI, MRI, TRI, *RtnAddrOp);
+              ScoreBrackets.getRegInterval(&MI, *RtnAddrOp);
 
           ScoreBrackets.determineWait(SmemAccessCounter, RtnAddrOpInterval,
                                       Wait);
@@ -2000,7 +1990,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
         if (Op.isTied() && Op.isUse() && TII->doesNotReadTiedSource(MI))
           continue;
 
-        RegInterval Interval = ScoreBrackets.getRegInterval(&MI, MRI, TRI, Op);
+        RegInterval Interval = ScoreBrackets.getRegInterval(&MI, Op);
 
         const bool IsVGPR = TRI->isVectorRegister(*MRI, Op.getReg());
         if (IsVGPR) {
@@ -2237,16 +2227,15 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
   if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst)) {
     if (TII->isAlwaysGDS(Inst.getOpcode()) ||
         TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
-      ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst);
-      ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst);
+      ScoreBrackets->updateByEvent(GDS_ACCESS, Inst);
+      ScoreBrackets->updateByEvent(GDS_GPR_LOCK, Inst);
       ScoreBrackets->setPendingGDS();
     } else {
-      ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst);
+      ScoreBrackets->updateByEvent(LDS_ACCESS, Inst);
     }
   } else if (TII->isFLAT(Inst)) {
     if (SIInstrInfo::isGFX12CacheInvOrWBInst(Inst.getOpcode())) {
-      ScoreBrackets->updateByEvent(TII, TRI, MRI, getVmemWaitEventType(Inst),
-                                   Inst);
+      ScoreBrackets->updateByEvent(getVmemWaitEventType(Inst), Inst);
       return;
     }
 
@@ -2257,13 +2246,12 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
     if (TII->mayAccessVMEMThroughFlat(Inst)) {
       ++FlatASCount;
       IsVMEMAccess = true;
-      ScoreBrackets->updateByEvent(TII, TRI, MRI, getVmemWaitEventType(Inst),
-                                   Inst);
+      ScoreBrackets->updateByEvent(getVmemWaitEventType(Inst), Inst);
     }
 
     if (TII->mayAccessLDSThroughFlat(Inst)) {
       ++FlatASCount;
-      ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst);
+      ScoreBrackets->updateByEvent(LDS_ACCESS, Inst);
     }
 
     // This is a flat memory operation that access both VMEM and LDS, so note it
@@ -2274,16 +2262,15 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
   } else if (SIInstrInfo::isVMEM(Inst) &&
              !llvm::AMDGPU::getMUBUFIsBufferInv(Inst.getOpcode())) {
     IsVMEMAccess = true;
-    ScoreBrackets->updateByEvent(TII, TRI, MRI, getVmemWaitEventType(Inst),
-                                 Inst);
+    ScoreBrackets->updateByEvent(getVmemWaitEventType(Inst), Inst);
 
     if (ST->vmemWriteNeedsExpWaitcnt() &&
         (Inst.mayStore() || SIInstrInfo::isAtomicRet(Inst))) {
-      ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst);
+      ScoreBrackets->updateByEvent(VMW_GPR_LOCK, Inst);
     }
   } else if (TII->isSMRD(Inst)) {
     IsSMEMAccess = true;
-    ScoreBrackets->updateByEvent(TII, TRI, MRI, SMEM_ACCESS, Inst);
+    ScoreBrackets->updateByEvent(SMEM_ACCESS, Inst);
   } else if (Inst.isCall()) {
     if (callWaitsOnFunctionReturn(Inst)) {
       // Act as a wait on everything
@@ -2295,33 +2282,33 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
       ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt());
     }
   } else if (SIInstrInfo::isLDSDIR(Inst)) {
-    ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_LDS_ACCESS, Inst);
+    ScoreBrackets->updateByEvent(EXP_LDS_ACCESS, Inst);
   } else if (TII->isVINTERP(Inst)) {
     int64_t Imm = TII->getNamedOperand(Inst, AMDGPU::OpName::waitexp)->getImm();
     ScoreBrackets->applyWaitcnt(EXP_CNT, Imm);
   } else if (SIInstrInfo::isEXP(Inst)) {
     unsigned Imm = TII->getNamedOperand(Inst, AMDGPU::OpName::tgt)->getImm();
     if (Imm >= AMDGPU::Exp::ET_PARAM0 && Imm <= AMDGPU::Exp::ET_PARAM31)
-      ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_PARAM_ACCESS, Inst);
+      ScoreBrackets->updateByEvent(EXP_PARAM_ACCESS, Inst);
     else if (Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST)
-      ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_POS_ACCESS, Inst);
+      ScoreBrackets->updateByEvent(EXP_POS_ACCESS, Inst);
     else
-      ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_GPR_LOCK, Inst);
+      ScoreBrackets->updateByEvent(EXP_GPR_LOCK, Inst);
   } else if (SIInstrInfo::isSBarrierSCCWrite(Inst.getOpcode())) {
-    ScoreBrackets->updateByEvent(TII, TRI, MRI, SCC_WRITE, Inst);
+    ScoreBrackets->updateByEvent(SCC_WRITE, Inst);
   } else {
     switch (Inst.getOpcode()) {
     case AMDGPU::S_SENDMSG:
     case AMDGPU::S_SENDMSG_RTN_B32:
     case AMDGPU::S_SENDMSG_RTN_B64:
     case AMDGPU::S_SENDMSGHALT:
-      ScoreBrackets->updateByEvent(TII, TRI, MRI, SQ_MESSAGE, Inst);
+      ScoreBrackets->updateByEvent(SQ_MESSAGE, Inst);
       break;
     case AMDGPU::S_MEMTIME:
     case AMDGPU::S_MEMREALTIME:
     case AMDGPU::S_GET_BARRIER_STATE_M0:
     case AMDGPU::S_GET_BARRIER_STATE_IMM:
-      ScoreBrackets->updateByEvent(TII, TRI, MRI, SMEM_ACCESS, Inst);
+      ScoreBrackets->updateByEvent(SMEM_ACCESS, Inst);
       break;
     }
   }
@@ -2330,10 +2317,10 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
     return;
 
   if (IsVMEMAccess)
-    ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_GROUP, Inst);
+    ScoreBrackets->updateByEvent(VMEM_GROUP, Inst);
 
   if (IsSMEMAccess)
-    ScoreBrackets->updateByEvent(TII, TRI, MRI, SMEM_GROUP, Inst);
+    ScoreBrackets->updateByEvent(SMEM_GROUP, Inst);
 }
 
 bool WaitcntBrackets::mergeScore(const MergeInfo &M, unsigned &Score,
@@ -2637,7 +2624,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
       for (const MachineOperand &Op : MI.all_uses()) {
         if (Op.isDebug() || !TRI->isVectorRegister(*MRI, Op.getReg()))
           continue;
-        RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, Op);
+        RegInterval Interval = Brackets.getRegInterval(&MI, Op);
         // Vgpr use
         for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
           // If we find a register that is loaded inside the loop, 1. and 2.
@@ -2662,7 +2649,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
       // VMem load vgpr def
       if (isVMEMOrFlatVMEM(MI) && MI.mayLoad()) {
         for (const MachineOperand &Op : MI.all_defs()) {
-          RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, Op);
+          RegInterval Interval = Brackets.getRegInterval(&MI, Op);
           for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
             // If we find a register that is loaded inside the loop, 1. and 2.
             // are invalidated and we can exit.

>From f29d6fed5199e93055697a4a5482d373d3248804 Mon Sep 17 00:00:00 2001
From: Timur Golubovich <timur.golubovich at syntacore.com>
Date: Wed, 1 Oct 2025 13:20:58 +0300
Subject: [PATCH 35/48] [lldb][TypeSystemClang] Added unique builtins types for
 __bf16 and _Float16 (#157674)

During debugging applization with __bf16 and _Float16 float types it was
discovered that lldb creates the same CompilerType for them. This can
cause an infinite recursion error, if one tries to create two struct
specializations with these types and then inherit one specialization
from another.
---
 .../TypeSystem/Clang/TypeSystemClang.cpp      |  8 +++++++
 .../floating-types-specialization/Makefile    |  3 +++
 .../TestCppFloatingTypesSpecialization.py     | 22 +++++++++++++++++++
 .../floating-types-specialization/main.cpp    | 11 ++++++++++
 .../TestCppTemplateArguments.py               |  2 +-
 5 files changed, 45 insertions(+), 1 deletion(-)
 create mode 100644 lldb/test/API/lang/cpp/floating-types-specialization/Makefile
 create mode 100644 lldb/test/API/lang/cpp/floating-types-specialization/TestCppFloatingTypesSpecialization.py
 create mode 100644 lldb/test/API/lang/cpp/floating-types-specialization/main.cpp

diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index a5aaf1f9cb5af..21c265ede0bc5 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -960,6 +960,12 @@ CompilerType TypeSystemClang::GetBuiltinTypeForDWARFEncodingAndBitSize(
     if (type_name == "long double" &&
         QualTypeMatchesBitSize(bit_size, ast, ast.LongDoubleTy))
       return GetType(ast.LongDoubleTy);
+    if (type_name == "__bf16" &&
+        QualTypeMatchesBitSize(bit_size, ast, ast.BFloat16Ty))
+      return GetType(ast.BFloat16Ty);
+    if (type_name == "_Float16" &&
+        QualTypeMatchesBitSize(bit_size, ast, ast.Float16Ty))
+      return GetType(ast.Float16Ty);
     // As Rust currently uses `TypeSystemClang`, match `f128` here as well so it
     // doesn't get misinterpreted as `long double` on targets where they are
     // the same size but different formats.
@@ -1792,6 +1798,8 @@ bool TypeSystemClang::RecordHasFields(const RecordDecl *record_decl) {
     for (base_class = cxx_record_decl->bases_begin(),
         base_class_end = cxx_record_decl->bases_end();
          base_class != base_class_end; ++base_class) {
+      assert(record_decl != base_class->getType()->getAsCXXRecordDecl() &&
+             "Base can't inherit from itself.");
       if (RecordHasFields(base_class->getType()->getAsCXXRecordDecl()))
         return true;
     }
diff --git a/lldb/test/API/lang/cpp/floating-types-specialization/Makefile b/lldb/test/API/lang/cpp/floating-types-specialization/Makefile
new file mode 100644
index 0000000000000..99998b20bcb05
--- /dev/null
+++ b/lldb/test/API/lang/cpp/floating-types-specialization/Makefile
@@ -0,0 +1,3 @@
+CXX_SOURCES := main.cpp
+
+include Makefile.rules
diff --git a/lldb/test/API/lang/cpp/floating-types-specialization/TestCppFloatingTypesSpecialization.py b/lldb/test/API/lang/cpp/floating-types-specialization/TestCppFloatingTypesSpecialization.py
new file mode 100644
index 0000000000000..9564a0bc31809
--- /dev/null
+++ b/lldb/test/API/lang/cpp/floating-types-specialization/TestCppFloatingTypesSpecialization.py
@@ -0,0 +1,22 @@
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class TestCase(TestBase):
+    def test(self):
+        self.build()
+        lldbutil.run_to_source_breakpoint(
+            self, "// break here", lldb.SBFileSpec("main.cpp", False)
+        )
+
+        self.expect_expr("f0", result_type="Foo<__bf16>")
+        self.expect_expr("f1", result_type="Foo<__fp16>")
+
+        # Test sizeof to ensure while computing layout we don't do
+        # infinite recursion.
+        v = self.frame().EvaluateExpression("sizeof(f0)")
+        self.assertEqual(v.GetValueAsUnsigned() > 0, True)
+        v = self.frame().EvaluateExpression("sizeof(f1)")
+        self.assertEqual(v.GetValueAsUnsigned() > 0, True)
diff --git a/lldb/test/API/lang/cpp/floating-types-specialization/main.cpp b/lldb/test/API/lang/cpp/floating-types-specialization/main.cpp
new file mode 100644
index 0000000000000..e3e8a3767fef8
--- /dev/null
+++ b/lldb/test/API/lang/cpp/floating-types-specialization/main.cpp
@@ -0,0 +1,11 @@
+template <typename T> struct Foo;
+
+template <> struct Foo<__bf16> {};
+
+template <> struct Foo<_Float16> : Foo<__bf16> {};
+
+int main() {
+  Foo<__bf16> f0;
+  Foo<_Float16> f1;
+  return 0; // break here
+}
diff --git a/lldb/test/API/lang/cpp/template-arguments/TestCppTemplateArguments.py b/lldb/test/API/lang/cpp/template-arguments/TestCppTemplateArguments.py
index eac7b5ef1099a..f26d382bf8582 100644
--- a/lldb/test/API/lang/cpp/template-arguments/TestCppTemplateArguments.py
+++ b/lldb/test/API/lang/cpp/template-arguments/TestCppTemplateArguments.py
@@ -82,7 +82,7 @@ def test(self):
         value = self.expect_expr("temp7", result_type="Foo<__fp16, __fp16>")
         self.assertFalse(value.GetType().GetTemplateArgumentValue(target, 1))
 
-        value = self.expect_expr("temp8", result_type="Foo<__fp16, __fp16>")
+        value = self.expect_expr("temp8", result_type="Foo<__bf16, __bf16>")
         self.assertFalse(value.GetType().GetTemplateArgumentValue(target, 1))
 
         value = self.expect_expr("temp9", result_type="Bar<double, 1.200000e+00>")

>From d5a67bbfd12f68706d9e01b7298acceacb2d962a Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph at gmail.com>
Date: Thu, 21 Aug 2025 08:14:16 -0700
Subject: [PATCH 36/48] [MLIR] Apply clang-tidy fixes for
 misc-use-internal-linkage in OpenMPDialect.cpp (NFC)

---
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index f01ad05a778ec..a173cf13328cd 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -182,7 +182,7 @@ static ParseResult parseClauseAttr(AsmParser &parser, ClauseAttr &attr) {
 }
 
 template <typename ClauseAttr>
-void printClauseAttr(OpAsmPrinter &p, Operation *op, ClauseAttr attr) {
+static void printClauseAttr(OpAsmPrinter &p, Operation *op, ClauseAttr attr) {
   p << stringifyEnum(attr.getValue());
 }
 
@@ -1511,8 +1511,8 @@ static LogicalResult verifySynchronizationHint(Operation *op, uint64_t hint) {
 //===----------------------------------------------------------------------===//
 
 // Helper function to get bitwise AND of `value` and 'flag'
-uint64_t mapTypeToBitFlag(uint64_t value,
-                          llvm::omp::OpenMPOffloadMappingFlags flag) {
+static uint64_t mapTypeToBitFlag(uint64_t value,
+                                 llvm::omp::OpenMPOffloadMappingFlags flag) {
   return value & llvm::to_underlying(flag);
 }
 

>From c3fb192408f39a7b8191ca5ea1d17fbb8a530945 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Wed, 1 Oct 2025 14:03:29 +0300
Subject: [PATCH 37/48] [BOLT] Gadget scanner: optionally assume auth traps on
 failure (#139778)

On AArch64 it is possible for an auth instruction to either return an
invalid address value on failure (without FEAT_FPAC) or generate an
error (with FEAT_FPAC). It thus may be possible to never emit explicit
pointer checks, if the target CPU is known to support FEAT_FPAC.

This commit implements an --auth-traps-on-failure command line option,
which essentially makes "safe-to-dereference" and "trusted" register
properties identical and disables scanning for authentication oracles
completely.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp        | 112 +++++++----
 .../binary-analysis/AArch64/cmdline-args.test |   1 +
 .../AArch64/gs-pauth-authentication-oracles.s |   6 +-
 .../binary-analysis/AArch64/gs-pauth-calls.s  |   5 +-
 .../AArch64/gs-pauth-debug-output.s           | 177 ++++++++++-------
 .../AArch64/gs-pauth-signing-oracles.s        |  54 ++---
 .../AArch64/gs-pauth-tail-calls.s             | 184 +++++++++---------
 7 files changed, 314 insertions(+), 225 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index cfe4b6ba785e4..9d22d3c8c6cd7 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -14,6 +14,7 @@
 #include "bolt/Passes/PAuthGadgetScanner.h"
 #include "bolt/Core/ParallelUtilities.h"
 #include "bolt/Passes/DataflowAnalysis.h"
+#include "bolt/Utils/CommandLineOpts.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/MC/MCInst.h"
@@ -26,6 +27,11 @@ namespace llvm {
 namespace bolt {
 namespace PAuthGadgetScanner {
 
+static cl::opt<bool> AuthTrapsOnFailure(
+    "auth-traps-on-failure",
+    cl::desc("Assume authentication instructions always trap on failure"),
+    cl::cat(opts::BinaryAnalysisCategory));
+
 [[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef Label,
                                        const MCInst &MI) {
   dbgs() << "  " << Label << ": ";
@@ -364,6 +370,34 @@ class SrcSafetyAnalysis {
     return Clobbered;
   }
 
+  std::optional<MCPhysReg> getRegMadeTrustedByChecking(const MCInst &Inst,
+                                                       SrcState Cur) const {
+    // This function cannot return multiple registers. This is never the case
+    // on AArch64.
+    std::optional<MCPhysReg> RegCheckedByInst =
+        BC.MIB->getAuthCheckedReg(Inst, /*MayOverwrite=*/false);
+    if (RegCheckedByInst && Cur.SafeToDerefRegs[*RegCheckedByInst])
+      return *RegCheckedByInst;
+
+    auto It = CheckerSequenceInfo.find(&Inst);
+    if (It == CheckerSequenceInfo.end())
+      return std::nullopt;
+
+    MCPhysReg RegCheckedBySequence = It->second.first;
+    const MCInst *FirstCheckerInst = It->second.second;
+
+    // FirstCheckerInst should belong to the same basic block (see the
+    // assertion in DataflowSrcSafetyAnalysis::run()), meaning it was
+    // deterministically processed a few steps before this instruction.
+    const SrcState &StateBeforeChecker = getStateBefore(*FirstCheckerInst);
+
+    // The sequence checks the register, but it should be authenticated before.
+    if (!StateBeforeChecker.SafeToDerefRegs[RegCheckedBySequence])
+      return std::nullopt;
+
+    return RegCheckedBySequence;
+  }
+
   // Returns all registers that can be treated as if they are written by an
   // authentication instruction.
   SmallVector<MCPhysReg> getRegsMadeSafeToDeref(const MCInst &Point,
@@ -386,18 +420,38 @@ class SrcSafetyAnalysis {
         Regs.push_back(DstAndSrc->first);
     }
 
+    // Make sure explicit checker sequence keeps register safe-to-dereference
+    // when the register would be clobbered according to the regular rules:
+    //
+    //    ; LR is safe to dereference here
+    //    mov   x16, x30  ; start of the sequence, LR is s-t-d right before
+    //    xpaclri         ; clobbers LR, LR is not safe anymore
+    //    cmp   x30, x16
+    //    b.eq  1f        ; end of the sequence: LR is marked as trusted
+    //    brk   0x1234
+    //  1:
+    //    ; at this point LR would be marked as trusted,
+    //    ; but not safe-to-dereference
+    //
+    // or even just
+    //
+    //    ; X1 is safe to dereference here
+    //    ldr x0, [x1, #8]!
+    //    ; X1 is trusted here, but it was clobbered due to address write-back
+    if (auto CheckedReg = getRegMadeTrustedByChecking(Point, Cur))
+      Regs.push_back(*CheckedReg);
+
     return Regs;
   }
 
   // Returns all registers made trusted by this instruction.
   SmallVector<MCPhysReg> getRegsMadeTrusted(const MCInst &Point,
                                             const SrcState &Cur) const {
+    assert(!AuthTrapsOnFailure && "Use getRegsMadeSafeToDeref instead");
     SmallVector<MCPhysReg> Regs;
 
     // An authenticated pointer can be checked, or
-    std::optional<MCPhysReg> CheckedReg =
-        BC.MIB->getAuthCheckedReg(Point, /*MayOverwrite=*/false);
-    if (CheckedReg && Cur.SafeToDerefRegs[*CheckedReg])
+    if (auto CheckedReg = getRegMadeTrustedByChecking(Point, Cur))
       Regs.push_back(*CheckedReg);
 
     // ... a pointer can be authenticated by an instruction that always checks
@@ -408,19 +462,6 @@ class SrcSafetyAnalysis {
     if (AutReg && IsChecked)
       Regs.push_back(*AutReg);
 
-    if (CheckerSequenceInfo.contains(&Point)) {
-      MCPhysReg CheckedReg;
-      const MCInst *FirstCheckerInst;
-      std::tie(CheckedReg, FirstCheckerInst) = CheckerSequenceInfo.at(&Point);
-
-      // FirstCheckerInst should belong to the same basic block (see the
-      // assertion in DataflowSrcSafetyAnalysis::run()), meaning it was
-      // deterministically processed a few steps before this instruction.
-      const SrcState &StateBeforeChecker = getStateBefore(*FirstCheckerInst);
-      if (StateBeforeChecker.SafeToDerefRegs[CheckedReg])
-        Regs.push_back(CheckedReg);
-    }
-
     // ... a safe address can be materialized, or
     if (auto NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Point))
       Regs.push_back(*NewAddrReg);
@@ -463,28 +504,11 @@ class SrcSafetyAnalysis {
     BitVector Clobbered = getClobberedRegs(Point);
     SmallVector<MCPhysReg> NewSafeToDerefRegs =
         getRegsMadeSafeToDeref(Point, Cur);
-    SmallVector<MCPhysReg> NewTrustedRegs = getRegsMadeTrusted(Point, Cur);
-
-    // Ideally, being trusted is a strictly stronger property than being
-    // safe-to-dereference. To simplify the computation of Next state, enforce
-    // this for NewSafeToDerefRegs and NewTrustedRegs. Additionally, this
-    // fixes the properly for "cumulative" register states in tricky cases
-    // like the following:
-    //
-    //    ; LR is safe to dereference here
-    //    mov   x16, x30  ; start of the sequence, LR is s-t-d right before
-    //    xpaclri         ; clobbers LR, LR is not safe anymore
-    //    cmp   x30, x16
-    //    b.eq  1f        ; end of the sequence: LR is marked as trusted
-    //    brk   0x1234
-    //  1:
-    //    ; at this point LR would be marked as trusted,
-    //    ; but not safe-to-dereference
-    //
-    for (auto TrustedReg : NewTrustedRegs) {
-      if (!is_contained(NewSafeToDerefRegs, TrustedReg))
-        NewSafeToDerefRegs.push_back(TrustedReg);
-    }
+    // If authentication instructions trap on failure, safe-to-dereference
+    // registers are always trusted.
+    SmallVector<MCPhysReg> NewTrustedRegs =
+        AuthTrapsOnFailure ? NewSafeToDerefRegs
+                           : getRegsMadeTrusted(Point, Cur);
 
     // Then, compute the state after this instruction is executed.
     SrcState Next = Cur;
@@ -521,6 +545,11 @@ class SrcSafetyAnalysis {
       dbgs() << ")\n";
     });
 
+    // Being trusted is a strictly stronger property than being
+    // safe-to-dereference.
+    assert(!Next.TrustedRegs.test(Next.SafeToDerefRegs) &&
+           "SafeToDerefRegs should contain all TrustedRegs");
+
     return Next;
   }
 
@@ -1130,6 +1159,11 @@ class DataflowDstSafetyAnalysis
   }
 
   void run() override {
+    // As long as DstSafetyAnalysis is only computed to detect authentication
+    // oracles, it is a waste of time to compute it when authentication
+    // instructions are known to always trap on failure.
+    assert(!AuthTrapsOnFailure &&
+           "DstSafetyAnalysis is useless with faulting auth");
     for (BinaryBasicBlock &BB : Func) {
       if (auto CheckerInfo = BC.MIB->getAuthCheckedReg(BB)) {
         LLVM_DEBUG({
@@ -1571,6 +1605,8 @@ void FunctionAnalysisContext::findUnsafeDefs(
     SmallVector<PartialReport<MCPhysReg>> &Reports) {
   if (PacRetGadgetsOnly)
     return;
+  if (AuthTrapsOnFailure)
+    return;
 
   auto Analysis = DstSafetyAnalysis::create(BF, AllocatorId, {});
   LLVM_DEBUG({ dbgs() << "Running dst register safety analysis...\n"; });
diff --git a/bolt/test/binary-analysis/AArch64/cmdline-args.test b/bolt/test/binary-analysis/AArch64/cmdline-args.test
index 3e70b2c0d3bb9..9660ad3bf80f7 100644
--- a/bolt/test/binary-analysis/AArch64/cmdline-args.test
+++ b/bolt/test/binary-analysis/AArch64/cmdline-args.test
@@ -33,6 +33,7 @@ HELP-NEXT:  OPTIONS:
 HELP-EMPTY:
 HELP-NEXT:  BinaryAnalysis options:
 HELP-EMPTY:
+HELP-NEXT:   --auth-traps-on-failure - Assume authentication instructions always trap on failure
 HELP-NEXT:   --scanners=<value> - which gadget scanners to run
 HELP-NEXT:   =pacret - pac-ret: return address protection (subset of "pauth")
 HELP-NEXT:   =pauth - All Pointer Authentication scanners
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s b/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s
index f44ba21b9d484..9f580b66f47c7 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s
@@ -1,6 +1,7 @@
 // RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe
-// RUN: llvm-bolt-binary-analysis --scanners=pacret %t.exe 2>&1 | FileCheck -check-prefix=PACRET %s
-// RUN: llvm-bolt-binary-analysis --scanners=pauth  %t.exe 2>&1 | FileCheck %s
+// RUN: llvm-bolt-binary-analysis --scanners=pacret                        %t.exe 2>&1 | FileCheck -check-prefix=PACRET %s
+// RUN: llvm-bolt-binary-analysis --scanners=pauth --auth-traps-on-failure %t.exe 2>&1 | FileCheck -check-prefix=FPAC %s
+// RUN: llvm-bolt-binary-analysis --scanners=pauth                         %t.exe 2>&1 | FileCheck %s
 
 // The detection of compiler-generated explicit pointer checks is tested in
 // gs-pauth-address-checks.s, for that reason only test here "dummy-load" and
@@ -8,6 +9,7 @@
 // detected per-instruction and per-BB.
 
 // PACRET-NOT: authentication oracle found in function
+// FPAC-NOT:   authentication oracle found in function
 
         .text
 
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-calls.s b/bolt/test/binary-analysis/AArch64/gs-pauth-calls.s
index fb0bc7cff2377..5e88e105a33f0 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-calls.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-calls.s
@@ -1,6 +1,7 @@
 // RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe
-// RUN: llvm-bolt-binary-analysis --scanners=pacret %t.exe 2>&1 | FileCheck -check-prefix=PACRET %s
-// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s
+// RUN: llvm-bolt-binary-analysis --scanners=pacret                        %t.exe 2>&1 | FileCheck -check-prefix=PACRET %s
+// RUN: llvm-bolt-binary-analysis --scanners=pauth --auth-traps-on-failure %t.exe 2>&1 | FileCheck %s
+// RUN: llvm-bolt-binary-analysis --scanners=pauth                         %t.exe 2>&1 | FileCheck %s
 
 // PACRET-NOT: non-protected call found in function
 
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
index b1cec7f92ad05..ee8521ff1f810 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
@@ -1,10 +1,14 @@
 // REQUIRES: asserts
 //
 // RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe
-// RUN: llvm-bolt-binary-analysis --scanners=pacret -no-threads \
-// RUN:    -debug-only bolt-pauth-scanner %t.exe 2>&1 | FileCheck %s
-// RUN: llvm-bolt-binary-analysis --scanners=pauth -no-threads \
-// RUN:    -debug-only bolt-pauth-scanner %t.exe 2>&1 | FileCheck -check-prefixes=CHECK,PAUTH %s
+// RUN: llvm-bolt-binary-analysis --scanners=pacret --no-threads \
+// RUN:    -debug-only bolt-pauth-scanner %t.exe 2>&1 | FileCheck -check-prefixes=CHECK,NOFPAC %s
+// RUN: llvm-bolt-binary-analysis --scanners=pacret --no-threads --auth-traps-on-failure \
+// RUN:    -debug-only bolt-pauth-scanner %t.exe 2>&1 | FileCheck -check-prefixes=CHECK,FPAC %s
+// RUN: llvm-bolt-binary-analysis --scanners=pauth  --no-threads \
+// RUN:    -debug-only bolt-pauth-scanner %t.exe 2>&1 | FileCheck -check-prefixes=CHECK,NOFPAC,AUTH-ORACLES,PAUTH %s
+// RUN: llvm-bolt-binary-analysis --scanners=pauth  --no-threads --auth-traps-on-failure \
+// RUN:    -debug-only bolt-pauth-scanner %t.exe 2>&1 | FileCheck -check-prefixes=CHECK,FPAC,PAUTH %s
 
 // Check the debug output generated by PAuth gadget scanner to make sure the
 // that output is kept meaningful and to provide an overview of what happens
@@ -61,30 +65,54 @@ simple:
 // CHECK-NEXT:     State 1: src-state<empty>
 // CHECK-NEXT:     State 2: src-state<SafeToDerefRegs: , TrustedRegs:  , Insts: >)
 // CHECK-NEXT:     merged state: src-state<SafeToDerefRegs: , TrustedRegs:  , Insts: >
-// CHECK-NEXT:   SrcSafetyAnalysis::ComputeNext(   autiza  x0, src-state<SafeToDerefRegs: , TrustedRegs:  , Insts: >)
-// CHECK-NEXT:     .. result: (src-state<SafeToDerefRegs: W0 X0 W0_HI , TrustedRegs: , Insts: >)
-// CHECK-NEXT:   SrcSafetyAnalysis::ComputeNext(   blr     x0, src-state<SafeToDerefRegs: W0 X0 W0_HI , TrustedRegs: , Insts: >)
-// CHECK-NEXT:     .. result: (src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
-// CHECK-NEXT:   SrcSafetyAnalysis::ComputeNext(   ldp     x29, x30, [sp], #0x10, src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
-// CHECK-NEXT:     .. result: (src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
-// CHECK-NEXT:   SrcSafetyAnalysis::ComputeNext(   hint    #29, src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
-// CHECK-NEXT:     .. result: (src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: , Insts: >)
-// CHECK-NEXT:   SrcSafetyAnalysis::ComputeNext(   ret     x30, src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: , Insts: >)
-// CHECK-NEXT:     .. result: (src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: , Insts: >)
-// CHECK-NEXT:   DataflowSrcSafetyAnalysis::Confluence(
-// CHECK-NEXT:     State 1: src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >
-// CHECK-NEXT:     State 2: src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
-// CHECK-NEXT:     merged state: src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >
-// CHECK-NEXT:   SrcSafetyAnalysis::ComputeNext(   autiza  x0, src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
-// CHECK-NEXT:     .. result: (src-state<SafeToDerefRegs: W0 X0 W0_HI , TrustedRegs: , Insts: >)
-// CHECK-NEXT:   SrcSafetyAnalysis::ComputeNext(   blr     x0, src-state<SafeToDerefRegs: W0 X0 W0_HI , TrustedRegs: , Insts: >)
-// CHECK-NEXT:     .. result: (src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
-// CHECK-NEXT:   SrcSafetyAnalysis::ComputeNext(   ldp     x29, x30, [sp], #0x10, src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
-// CHECK-NEXT:     .. result: (src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
-// CHECK-NEXT:   SrcSafetyAnalysis::ComputeNext(   hint    #29, src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
-// CHECK-NEXT:     .. result: (src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: , Insts: >)
-// CHECK-NEXT:   SrcSafetyAnalysis::ComputeNext(   ret     x30, src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: , Insts: >)
-// CHECK-NEXT:     .. result: (src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   autiza  x0, src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: W0 X0 W0_HI , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   blr     x0, src-state<SafeToDerefRegs: W0 X0 W0_HI , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   ldp     x29, x30, [sp], #0x10, src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   hint    #29, src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   ret     x30, src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:   DataflowSrcSafetyAnalysis::Confluence(
+// NOFPAC-NEXT:     State 1: src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >
+// NOFPAC-NEXT:     State 2: src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:     merged state: src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >
+// NOFPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   autiza  x0, src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: W0 X0 W0_HI , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   blr     x0, src-state<SafeToDerefRegs: W0 X0 W0_HI , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   ldp     x29, x30, [sp], #0x10, src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   hint    #29, src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   ret     x30, src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: , Insts: >)
+// NOFPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: , Insts: >)
+// FPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   autiza  x0, src-state<SafeToDerefRegs: , TrustedRegs:  , Insts: >)
+// FPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: W0 X0 W0_HI , TrustedRegs: W0 X0 W0_HI , Insts: >)
+// FPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   blr     x0, src-state<SafeToDerefRegs: W0 X0 W0_HI , TrustedRegs: W0 X0 W0_HI , Insts: >)
+// FPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// FPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   ldp     x29, x30, [sp], #0x10, src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// FPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// FPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   hint    #29, src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// FPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: LR W30 W30_HI , Insts: >)
+// FPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   ret     x30, src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: LR W30 W30_HI , Insts: >)
+// FPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: LR W30 W30_HI , Insts: >)
+// FPAC-NEXT:   DataflowSrcSafetyAnalysis::Confluence(
+// FPAC-NEXT:     State 1: src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >
+// FPAC-NEXT:     State 2: src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// FPAC-NEXT:     merged state: src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >
+// FPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   autiza  x0, src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// FPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: W0 X0 W0_HI , TrustedRegs: W0 X0 W0_HI , Insts: >)
+// FPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   blr     x0, src-state<SafeToDerefRegs: W0 X0 W0_HI , TrustedRegs: W0 X0 W0_HI , Insts: >)
+// FPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// FPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   ldp     x29, x30, [sp], #0x10, src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// FPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// FPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   hint    #29, src-state<SafeToDerefRegs: , TrustedRegs: , Insts: >)
+// FPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: LR W30 W30_HI , Insts: >)
+// FPAC-NEXT:   SrcSafetyAnalysis::ComputeNext(   ret     x30, src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: LR W30 W30_HI , Insts: >)
+// FPAC-NEXT:     .. result: (src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: LR W30 W30_HI , Insts: >)
 // CHECK-NEXT: After src register safety analysis:
 // CHECK-NEXT: Binary Function "simple"  {
 // CHECK-NEXT:   Number      : 1
@@ -255,53 +283,56 @@ auth_oracle:
 // ...
 // CHECK:      End of Function "auth_oracle"
 // ...
-// PAUTH:      Running dst register safety analysis...
-// PAUTH-NEXT:   DstSafetyAnalysis::ComputeNext(       ret     x30, dst-state<CannotEscapeUnchecked: , Insts: >)
-// PAUTH-NEXT:     .. result: (dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: >)
-// PAUTH-NEXT:   DstSafetyAnalysis::ComputeNext(       autia   x0, x1, dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: >)
-// PAUTH-NEXT:     .. result: (dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: >)
-// PAUTH-NEXT: After dst register safety analysis:
-// PAUTH-NEXT: Binary Function "auth_oracle"  {
-// PAUTH-NEXT:   Number      : 4
-// PAUTH-NEXT:   State       : CFG constructed
+// FPAC-NOT: Running dst register safety analysis
+// FPAC-NOT: DstSafetyAnalysis::ComputeNext
+// FPAC-NOT: {{.*dst-state.*}}
+// AUTH-ORACLES:      Running dst register safety analysis...
+// AUTH-ORACLES-NEXT:   DstSafetyAnalysis::ComputeNext(       ret     x30, dst-state<CannotEscapeUnchecked: , Insts: >)
+// AUTH-ORACLES-NEXT:     .. result: (dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: >)
+// AUTH-ORACLES-NEXT:   DstSafetyAnalysis::ComputeNext(       autia   x0, x1, dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: >)
+// AUTH-ORACLES-NEXT:     .. result: (dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: >)
+// AUTH-ORACLES-NEXT: After dst register safety analysis:
+// AUTH-ORACLES-NEXT: Binary Function "auth_oracle"  {
+// AUTH-ORACLES-NEXT:   Number      : 4
+// AUTH-ORACLES-NEXT:   State       : CFG constructed
 // ...
-// PAUTH:        BB Layout   : [[BB0]]
-// PAUTH-NEXT: }
-// PAUTH-NEXT: [[BB0]] (2 instructions, align : 1)
-// PAUTH-NEXT:   Entry Point
-// PAUTH-NEXT:     00000000:   autia   x0, x1 # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: >
-// PAUTH-NEXT:     00000004:   ret # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: >
-// PAUTH-EMPTY:
-// PAUTH-NEXT: DWARF CFI Instructions:
-// PAUTH-NEXT:     <empty>
-// PAUTH-NEXT: End of Function "auth_oracle"
-// PAUTH-EMPTY:
-// PAUTH-NEXT:   Found auth inst:     00000000:        autia   x0, x1 # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: >
-// PAUTH-NEXT:     Authenticated reg: X0
-// PAUTH-NEXT:     safe output registers: LR W30 W30_HI{{[ \t]*$}}
-// PAUTH-EMPTY:
-// PAUTH-NEXT: Running detailed dst register safety analysis...
-// PAUTH-NEXT:   DstSafetyAnalysis::ComputeNext(       ret     x30, dst-state<CannotEscapeUnchecked: , Insts: [0]()>)
-// PAUTH-NEXT:     .. result: (dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: [0]()>)
-// PAUTH-NEXT:   DstSafetyAnalysis::ComputeNext(       autia   x0, x1, dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: [0]()>)
-// PAUTH-NEXT:     .. result: (dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: [0](0x{{[0-9a-f]+}} )>)
-// PAUTH-NEXT: After detailed dst register safety analysis:
-// PAUTH-NEXT: Binary Function "auth_oracle"  {
-// PAUTH-NEXT:   Number      : 4
-// PAUTH-NEXT:   State       : CFG constructed
+// AUTH-ORACLES:        BB Layout   : [[BB0]]
+// AUTH-ORACLES-NEXT: }
+// AUTH-ORACLES-NEXT: [[BB0]] (2 instructions, align : 1)
+// AUTH-ORACLES-NEXT:   Entry Point
+// AUTH-ORACLES-NEXT:     00000000:   autia   x0, x1 # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: >
+// AUTH-ORACLES-NEXT:     00000004:   ret # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: >
+// AUTH-ORACLES-EMPTY:
+// AUTH-ORACLES-NEXT: DWARF CFI Instructions:
+// AUTH-ORACLES-NEXT:     <empty>
+// AUTH-ORACLES-NEXT: End of Function "auth_oracle"
+// AUTH-ORACLES-EMPTY:
+// AUTH-ORACLES-NEXT:   Found auth inst:     00000000:        autia   x0, x1 # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: >
+// AUTH-ORACLES-NEXT:     Authenticated reg: X0
+// AUTH-ORACLES-NEXT:     safe output registers: LR W30 W30_HI{{[ \t]*$}}
+// AUTH-ORACLES-EMPTY:
+// AUTH-ORACLES-NEXT: Running detailed dst register safety analysis...
+// AUTH-ORACLES-NEXT:   DstSafetyAnalysis::ComputeNext(       ret     x30, dst-state<CannotEscapeUnchecked: , Insts: [0]()>)
+// AUTH-ORACLES-NEXT:     .. result: (dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: [0]()>)
+// AUTH-ORACLES-NEXT:   DstSafetyAnalysis::ComputeNext(       autia   x0, x1, dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: [0]()>)
+// AUTH-ORACLES-NEXT:     .. result: (dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: [0](0x{{[0-9a-f]+}} )>)
+// AUTH-ORACLES-NEXT: After detailed dst register safety analysis:
+// AUTH-ORACLES-NEXT: Binary Function "auth_oracle"  {
+// AUTH-ORACLES-NEXT:   Number      : 4
+// AUTH-ORACLES-NEXT:   State       : CFG constructed
 // ...
-// PAUTH:        BB Layout   : [[BB0]]
-// PAUTH-NEXT: }
-// PAUTH-NEXT: [[BB0]] (2 instructions, align : 1)
-// PAUTH-NEXT:   Entry Point
-// PAUTH-NEXT:     00000000:   autia   x0, x1 # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: [0](0x{{[0-9a-f]+}} )>
-// PAUTH-NEXT:     00000004:   ret # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: [0]()>
-// PAUTH-EMPTY:
-// PAUTH-NEXT: DWARF CFI Instructions:
-// PAUTH-NEXT:     <empty>
-// PAUTH-NEXT: End of Function "auth_oracle"
-// PAUTH-EMPTY:
-// PAUTH-NEXT:   Attaching leakage info to:     00000000:      autia   x0, x1 # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: [0](0x{{[0-9a-f]+}} )>
+// AUTH-ORACLES:        BB Layout   : [[BB0]]
+// AUTH-ORACLES-NEXT: }
+// AUTH-ORACLES-NEXT: [[BB0]] (2 instructions, align : 1)
+// AUTH-ORACLES-NEXT:   Entry Point
+// AUTH-ORACLES-NEXT:     00000000:   autia   x0, x1 # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: [0](0x{{[0-9a-f]+}} )>
+// AUTH-ORACLES-NEXT:     00000004:   ret # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: [0]()>
+// AUTH-ORACLES-EMPTY:
+// AUTH-ORACLES-NEXT: DWARF CFI Instructions:
+// AUTH-ORACLES-NEXT:     <empty>
+// AUTH-ORACLES-NEXT: End of Function "auth_oracle"
+// AUTH-ORACLES-EMPTY:
+// AUTH-ORACLES-NEXT:   Attaching leakage info to:     00000000:      autia   x0, x1 # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: [0](0x{{[0-9a-f]+}} )>
 
 // Gadget scanner should not crash on CFI instructions, including when debug-printing them.
 // Note that the particular debug output is not checked, but BOLT should be
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s b/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s
index 4d4bb7b0fb251..7d908f234d852 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s
@@ -1,6 +1,7 @@
 // RUN: %clang %cflags -march=armv8.3-a+pauth-lr -Wl,--no-relax %s -o %t.exe
-// RUN: llvm-bolt-binary-analysis --scanners=pacret %t.exe 2>&1 | FileCheck -check-prefix=PACRET %s
-// RUN: llvm-bolt-binary-analysis --scanners=pauth  %t.exe 2>&1 | FileCheck %s
+// RUN: llvm-bolt-binary-analysis --scanners=pacret                        %t.exe 2>&1 | FileCheck -check-prefix=PACRET %s
+// RUN: llvm-bolt-binary-analysis --scanners=pauth                         %t.exe 2>&1 | FileCheck -check-prefixes=CHECK,NOFPAC %s
+// RUN: llvm-bolt-binary-analysis --scanners=pauth --auth-traps-on-failure %t.exe 2>&1 | FileCheck -check-prefixes=CHECK,FPAC %s
 
 // The detection of compiler-generated explicit pointer checks is tested in
 // gs-pauth-address-checks.s, for that reason only test here "dummy-load" and
@@ -66,9 +67,10 @@ good_sign_auted_checked_brk:
         .globl  bad_sign_authed_unchecked
         .type   bad_sign_authed_unchecked, at function
 bad_sign_authed_unchecked:
-// CHECK-LABEL: GS-PAUTH: signing oracle found in function bad_sign_authed_unchecked, basic block {{[^,]+}}, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:     pacda   x0, x1
-// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
+// FPAC-NOT: bad_sign_authed_unchecked
+// NOFPAC-LABEL: GS-PAUTH: signing oracle found in function bad_sign_authed_unchecked, basic block {{[^,]+}}, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:     pacda   x0, x1
+// NOFPAC-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
         autda   x0, x2
         pacda   x0, x1
         ret
@@ -266,9 +268,10 @@ bad_call_between_checked_and_used:
         .globl  bad_transition_check_then_auth
         .type   bad_transition_check_then_auth, at function
 bad_transition_check_then_auth:
-// CHECK-LABEL: GS-PAUTH: signing oracle found in function bad_transition_check_then_auth, basic block {{[^,]+}}, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:     pacda   x0, x1
-// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
+// FPAC-NOT: bad_transition_check_then_auth
+// NOFPAC-LABEL: GS-PAUTH: signing oracle found in function bad_transition_check_then_auth, basic block {{[^,]+}}, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:     pacda   x0, x1
+// NOFPAC-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
         ldr     x2, [x0]
         autda   x0, x2
         pacda   x0, x1
@@ -278,9 +281,10 @@ bad_transition_check_then_auth:
         .globl  bad_transition_auth_then_auth
         .type   bad_transition_auth_then_auth, at function
 bad_transition_auth_then_auth:
-// CHECK-LABEL: GS-PAUTH: signing oracle found in function bad_transition_auth_then_auth, basic block {{[^,]+}}, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:     pacda   x0, x1
-// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
+// FPAC-NOT: bad_transition_auth_then_auth
+// NOFPAC-LABEL: GS-PAUTH: signing oracle found in function bad_transition_auth_then_auth, basic block {{[^,]+}}, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:     pacda   x0, x1
+// NOFPAC-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
         autda   x0, x2
         autda   x0, x2
         pacda   x0, x1
@@ -363,9 +367,10 @@ good_sign_auted_checked_brk_multi_bb:
         .globl  bad_sign_authed_unchecked_multi_bb
         .type   bad_sign_authed_unchecked_multi_bb, at function
 bad_sign_authed_unchecked_multi_bb:
-// CHECK-LABEL: GS-PAUTH: signing oracle found in function bad_sign_authed_unchecked_multi_bb, basic block {{[^,]+}}, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:     pacda   x0, x1
-// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
+// FPAC-NOT: bad_sign_authed_unchecked_multi_bb
+// NOFPAC-LABEL: GS-PAUTH: signing oracle found in function bad_sign_authed_unchecked_multi_bb, basic block {{[^,]+}}, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:     pacda   x0, x1
+// NOFPAC-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
         autda   x0, x2
         cbz     x3, 1f
         ldr     x2, [x0]
@@ -534,9 +539,10 @@ good_sign_auted_checked_ldr_nocfg:
         .globl  bad_sign_authed_unchecked_nocfg
         .type   bad_sign_authed_unchecked_nocfg, at function
 bad_sign_authed_unchecked_nocfg:
-// CHECK-LABEL: GS-PAUTH: signing oracle found in function bad_sign_authed_unchecked_nocfg, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:     pacda   x0, x1
-// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
+// FPAC-NOT: bad_sign_authed_unchecked_nocfg
+// NOFPAC-LABEL: GS-PAUTH: signing oracle found in function bad_sign_authed_unchecked_nocfg, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:     pacda   x0, x1
+// NOFPAC-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
         adr     x3, 1f
         br      x3
 1:
@@ -640,9 +646,10 @@ bad_clobber_between_checked_and_used_nocfg:
         .globl  bad_transition_check_then_auth_nocfg
         .type   bad_transition_check_then_auth_nocfg, at function
 bad_transition_check_then_auth_nocfg:
-// CHECK-LABEL: GS-PAUTH: signing oracle found in function bad_transition_check_then_auth_nocfg, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:     pacda   x0, x1
-// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
+// FPAC-NOT: bad_transition_check_then_auth_nocfg
+// NOFPAC-LABEL: GS-PAUTH: signing oracle found in function bad_transition_check_then_auth_nocfg, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:     pacda   x0, x1
+// NOFPAC-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
         adr     x3, 1f
         br      x3
 1:
@@ -655,9 +662,10 @@ bad_transition_check_then_auth_nocfg:
         .globl  bad_transition_auth_then_auth_nocfg
         .type   bad_transition_auth_then_auth_nocfg, at function
 bad_transition_auth_then_auth_nocfg:
-// CHECK-LABEL: GS-PAUTH: signing oracle found in function bad_transition_auth_then_auth_nocfg, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:     pacda   x0, x1
-// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
+// FPAC-NOT: bad_transition_auth_then_auth_nocfg
+// NOFPAC-LABEL: GS-PAUTH: signing oracle found in function bad_transition_auth_then_auth_nocfg, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:     pacda   x0, x1
+// NOFPAC-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
         adr     x3, 1f
         br      x3
 1:
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-tail-calls.s b/bolt/test/binary-analysis/AArch64/gs-pauth-tail-calls.s
index 2d3c2f1a632ca..59b7d929275a9 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-tail-calls.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-tail-calls.s
@@ -1,6 +1,7 @@
 // RUN: %clang %cflags -Wl,--entry=_custom_start -march=armv8.3-a %s -o %t.exe
-// RUN: llvm-bolt-binary-analysis --scanners=pacret %t.exe 2>&1 | FileCheck -check-prefix=PACRET %s
-// RUN: llvm-bolt-binary-analysis --scanners=pauth  %t.exe 2>&1 | FileCheck %s
+// RUN: llvm-bolt-binary-analysis --scanners=pacret                        %t.exe 2>&1 | FileCheck -check-prefix=PACRET %s
+// RUN: llvm-bolt-binary-analysis --scanners=pauth --auth-traps-on-failure %t.exe 2>&1 | FileCheck -check-prefixes=CHECK,FPAC %s
+// RUN: llvm-bolt-binary-analysis --scanners=pauth                         %t.exe 2>&1 | FileCheck -check-prefixes=CHECK,NOFPAC %s
 
 // PACRET-NOT: untrusted link register found before tail call
 
@@ -89,19 +90,20 @@ bad_indirect_tailcall_not_auted:
         .globl  bad_direct_tailcall_untrusted
         .type   bad_direct_tailcall_untrusted, at function
 bad_direct_tailcall_untrusted:
-// CHECK-LABEL: GS-PAUTH: untrusted link register found before tail call in function bad_direct_tailcall_untrusted, basic block {{[^,]+}}, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      b       callee # TAILCALL
-// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
-// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_direct_tailcall_untrusted, basic block {{[^,]+}}, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
-// CHECK-NEXT:  The 1 instructions that leak the affected registers are:
-// CHECK-NEXT:  1.     {{[0-9a-f]+}}:      b       callee # TAILCALL
-// CHECK-NEXT:  This happens in the following basic block:
-// CHECK-NEXT:  {{[0-9a-f]+}}:   paciasp
-// CHECK-NEXT:  {{[0-9a-f]+}}:   stp     x29, x30, [sp, #-0x10]!
-// CHECK-NEXT:  {{[0-9a-f]+}}:   ldp     x29, x30, [sp], #0x10
-// CHECK-NEXT:  {{[0-9a-f]+}}:   autiasp
-// CHECK-NEXT:  {{[0-9a-f]+}}:   b       callee # TAILCALL
+// FPAC-NOT: bad_direct_tailcall_untrusted
+// NOFPAC-LABEL: GS-PAUTH: untrusted link register found before tail call in function bad_direct_tailcall_untrusted, basic block {{[^,]+}}, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      b       callee # TAILCALL
+// NOFPAC-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
+// NOFPAC-LABEL: GS-PAUTH: authentication oracle found in function bad_direct_tailcall_untrusted, basic block {{[^,]+}}, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
+// NOFPAC-NEXT:  The 1 instructions that leak the affected registers are:
+// NOFPAC-NEXT:  1.     {{[0-9a-f]+}}:      b       callee # TAILCALL
+// NOFPAC-NEXT:  This happens in the following basic block:
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   paciasp
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   stp     x29, x30, [sp, #-0x10]!
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   ldp     x29, x30, [sp], #0x10
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   autiasp
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   b       callee # TAILCALL
         paciasp
         stp     x29, x30, [sp, #-0x10]!
         ldp     x29, x30, [sp], #0x10
@@ -114,19 +116,20 @@ bad_direct_tailcall_untrusted:
 bad_plt_tailcall_untrusted:
 // FIXME: Calls via PLT are disassembled incorrectly. Nevertheless, they are
 //        still detected as tail calls.
-// CHECK-LABEL: GS-PAUTH: untrusted link register found before tail call in function bad_plt_tailcall_untrusted, basic block {{[^,]+}}, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      b       bad_indirect_tailcall_untrusted # TAILCALL
-// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
-// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_plt_tailcall_untrusted, basic block {{[^,]+}}, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
-// CHECK-NEXT:  The 1 instructions that leak the affected registers are:
-// CHECK-NEXT:  1.     {{[0-9a-f]+}}:      b       bad_indirect_tailcall_untrusted # TAILCALL
-// CHECK-NEXT:  This happens in the following basic block:
-// CHECK-NEXT:  {{[0-9a-f]+}}:   paciasp
-// CHECK-NEXT:  {{[0-9a-f]+}}:   stp     x29, x30, [sp, #-0x10]!
-// CHECK-NEXT:  {{[0-9a-f]+}}:   ldp     x29, x30, [sp], #0x10
-// CHECK-NEXT:  {{[0-9a-f]+}}:   autiasp
-// CHECK-NEXT:  {{[0-9a-f]+}}:   b       bad_indirect_tailcall_untrusted # TAILCALL
+// FPAC-NOT: bad_plt_tailcall_untrusted
+// NOFPAC-LABEL: GS-PAUTH: untrusted link register found before tail call in function bad_plt_tailcall_untrusted, basic block {{[^,]+}}, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      b       bad_indirect_tailcall_untrusted # TAILCALL
+// NOFPAC-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
+// NOFPAC-LABEL: GS-PAUTH: authentication oracle found in function bad_plt_tailcall_untrusted, basic block {{[^,]+}}, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
+// NOFPAC-NEXT:  The 1 instructions that leak the affected registers are:
+// NOFPAC-NEXT:  1.     {{[0-9a-f]+}}:      b       bad_indirect_tailcall_untrusted # TAILCALL
+// NOFPAC-NEXT:  This happens in the following basic block:
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   paciasp
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   stp     x29, x30, [sp, #-0x10]!
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   ldp     x29, x30, [sp], #0x10
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   autiasp
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   b       bad_indirect_tailcall_untrusted # TAILCALL
         paciasp
         stp     x29, x30, [sp, #-0x10]!
         ldp     x29, x30, [sp], #0x10
@@ -137,20 +140,21 @@ bad_plt_tailcall_untrusted:
         .globl  bad_indirect_tailcall_untrusted
         .type   bad_indirect_tailcall_untrusted, at function
 bad_indirect_tailcall_untrusted:
-// CHECK-LABEL: GS-PAUTH: untrusted link register found before tail call in function bad_indirect_tailcall_untrusted, basic block {{[^,]+}}, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      br      x0 # TAILCALL
-// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
-// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_indirect_tailcall_untrusted, basic block {{[^,]+}}, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
-// CHECK-NEXT:  The 1 instructions that leak the affected registers are:
-// CHECK-NEXT:  1.     {{[0-9a-f]+}}:      br      x0 # TAILCALL
-// CHECK-NEXT:  This happens in the following basic block:
-// CHECK-NEXT:  {{[0-9a-f]+}}:   paciasp
-// CHECK-NEXT:  {{[0-9a-f]+}}:   stp     x29, x30, [sp, #-0x10]!
-// CHECK-NEXT:  {{[0-9a-f]+}}:   ldp     x29, x30, [sp], #0x10
-// CHECK-NEXT:  {{[0-9a-f]+}}:   autiasp
-// CHECK-NEXT:  {{[0-9a-f]+}}:   autia   x0, x1
-// CHECK-NEXT:  {{[0-9a-f]+}}:   br      x0 # TAILCALL
+// FPAC-NOT: bad_indirect_tailcall_untrusted
+// NOFPAC-LABEL: GS-PAUTH: untrusted link register found before tail call in function bad_indirect_tailcall_untrusted, basic block {{[^,]+}}, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      br      x0 # TAILCALL
+// NOFPAC-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
+// NOFPAC-LABEL: GS-PAUTH: authentication oracle found in function bad_indirect_tailcall_untrusted, basic block {{[^,]+}}, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
+// NOFPAC-NEXT:  The 1 instructions that leak the affected registers are:
+// NOFPAC-NEXT:  1.     {{[0-9a-f]+}}:      br      x0 # TAILCALL
+// NOFPAC-NEXT:  This happens in the following basic block:
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   paciasp
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   stp     x29, x30, [sp, #-0x10]!
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   ldp     x29, x30, [sp], #0x10
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   autiasp
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   autia   x0, x1
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   br      x0 # TAILCALL
         paciasp
         stp     x29, x30, [sp, #-0x10]!
         ldp     x29, x30, [sp], #0x10
@@ -251,13 +255,14 @@ bad_indirect_tailcall_not_auted_multi_bb:
         .globl  bad_direct_tailcall_untrusted_multi_bb
         .type   bad_direct_tailcall_untrusted_multi_bb, at function
 bad_direct_tailcall_untrusted_multi_bb:
-// CHECK-LABEL: GS-PAUTH: untrusted link register found before tail call in function bad_direct_tailcall_untrusted_multi_bb, basic block {{[^,]+}}, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      b       callee # TAILCALL
-// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
-// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_direct_tailcall_untrusted_multi_bb, basic block {{[^,]+}}, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
-// CHECK-NEXT:  The 1 instructions that leak the affected registers are:
-// CHECK-NEXT:  1.     {{[0-9a-f]+}}:      b       callee # TAILCALL
+// FPAC-NOT: bad_direct_tailcall_untrusted_multi_bb
+// NOFPAC-LABEL: GS-PAUTH: untrusted link register found before tail call in function bad_direct_tailcall_untrusted_multi_bb, basic block {{[^,]+}}, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      b       callee # TAILCALL
+// NOFPAC-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
+// NOFPAC-LABEL: GS-PAUTH: authentication oracle found in function bad_direct_tailcall_untrusted_multi_bb, basic block {{[^,]+}}, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
+// NOFPAC-NEXT:  The 1 instructions that leak the affected registers are:
+// NOFPAC-NEXT:  1.     {{[0-9a-f]+}}:      b       callee # TAILCALL
         paciasp
         stp     x29, x30, [sp, #-0x10]!
         ldp     x29, x30, [sp], #0x10
@@ -271,12 +276,13 @@ bad_direct_tailcall_untrusted_multi_bb:
         .globl  bad_indirect_tailcall_untrusted_multi_bb
         .type   bad_indirect_tailcall_untrusted_multi_bb, at function
 bad_indirect_tailcall_untrusted_multi_bb:
-// CHECK-LABEL: GS-PAUTH: untrusted link register found before tail call in function bad_indirect_tailcall_untrusted_multi_bb, basic block {{[^,]+}}, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      br      x0 # UNKNOWN CONTROL FLOW
-// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
-// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_indirect_tailcall_untrusted_multi_bb, basic block {{[^,]+}}, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
-// CHECK-NEXT:  The 0 instructions that leak the affected registers are:
+// FPAC-NOT: bad_indirect_tailcall_untrusted_multi_bb
+// NOFPAC-LABEL: GS-PAUTH: untrusted link register found before tail call in function bad_indirect_tailcall_untrusted_multi_bb, basic block {{[^,]+}}, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      br      x0 # UNKNOWN CONTROL FLOW
+// NOFPAC-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
+// NOFPAC-LABEL: GS-PAUTH: authentication oracle found in function bad_indirect_tailcall_untrusted_multi_bb, basic block {{[^,]+}}, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
+// NOFPAC-NEXT:  The 0 instructions that leak the affected registers are:
         paciasp
         stp     x29, x30, [sp, #-0x10]!
         ldp     x29, x30, [sp], #0x10
@@ -397,13 +403,14 @@ bad_indirect_tailcall_not_auted_nocfg:
         .globl  bad_direct_tailcall_untrusted_nocfg
         .type   bad_direct_tailcall_untrusted_nocfg, at function
 bad_direct_tailcall_untrusted_nocfg:
-// CHECK-LABEL: GS-PAUTH: untrusted link register found before tail call in function bad_direct_tailcall_untrusted_nocfg, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      b       callee # TAILCALL
-// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
-// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_direct_tailcall_untrusted_nocfg, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
-// CHECK-NEXT:  The 1 instructions that leak the affected registers are:
-// CHECK-NEXT:  1.     {{[0-9a-f]+}}:      b       callee # TAILCALL
+// FPAC-NOT: bad_direct_tailcall_untrusted_nocfg
+// NOFPAC-LABEL: GS-PAUTH: untrusted link register found before tail call in function bad_direct_tailcall_untrusted_nocfg, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      b       callee # TAILCALL
+// NOFPAC-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
+// NOFPAC-LABEL: GS-PAUTH: authentication oracle found in function bad_direct_tailcall_untrusted_nocfg, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
+// NOFPAC-NEXT:  The 1 instructions that leak the affected registers are:
+// NOFPAC-NEXT:  1.     {{[0-9a-f]+}}:      b       callee # TAILCALL
         paciasp
         stp     x29, x30, [sp, #-0x10]!
         adr     x3, 1f
@@ -419,13 +426,14 @@ bad_direct_tailcall_untrusted_nocfg:
 bad_plt_tailcall_untrusted_nocfg:
 // FIXME: Calls via PLT are disassembled incorrectly. Nevertheless, they are
 //        still detected as tail calls.
-// CHECK-LABEL: GS-PAUTH: untrusted link register found before tail call in function bad_plt_tailcall_untrusted_nocfg, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      b       bad_indirect_tailcall_untrusted_nocfg # TAILCALL
-// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
-// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_plt_tailcall_untrusted_nocfg, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
-// CHECK-NEXT:  The 1 instructions that leak the affected registers are:
-// CHECK-NEXT:  1.     {{[0-9a-f]+}}:      b       bad_indirect_tailcall_untrusted_nocfg # TAILCALL
+// FPAC-NOT: bad_plt_tailcall_untrusted_nocfg
+// NOFPAC-LABEL: GS-PAUTH: untrusted link register found before tail call in function bad_plt_tailcall_untrusted_nocfg, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      b       bad_indirect_tailcall_untrusted_nocfg # TAILCALL
+// NOFPAC-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
+// NOFPAC-LABEL: GS-PAUTH: authentication oracle found in function bad_plt_tailcall_untrusted_nocfg, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
+// NOFPAC-NEXT:  The 1 instructions that leak the affected registers are:
+// NOFPAC-NEXT:  1.     {{[0-9a-f]+}}:      b       bad_indirect_tailcall_untrusted_nocfg # TAILCALL
         paciasp
         stp     x29, x30, [sp, #-0x10]!
         adr     x3, 1f
@@ -441,11 +449,12 @@ bad_plt_tailcall_untrusted_nocfg:
 bad_indirect_tailcall_untrusted_nocfg:
 // Known false negative: ignoring UNKNOWN CONTROL FLOW without CFG.
 // Authentication oracle is found by a generic checker, though.
-// CHECK-NOT: untrusted link register{{.*}}bad_indirect_tailcall_untrusted_nocfg
-// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_indirect_tailcall_untrusted_nocfg, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
-// CHECK-NEXT:  The 0 instructions that leak the affected registers are:
-// CHECK-NOT: untrusted link register{{.*}}bad_indirect_tailcall_untrusted_nocfg
+// FPAC-NOT: bad_indirect_tailcall_untrusted_nocfg
+// NOFPAC-NOT: untrusted link register{{.*}}bad_indirect_tailcall_untrusted_nocfg
+// NOFPAC-LABEL: GS-PAUTH: authentication oracle found in function bad_indirect_tailcall_untrusted_nocfg, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
+// NOFPAC-NEXT:  The 0 instructions that leak the affected registers are:
+// NOFPAC-NOT: untrusted link register{{.*}}bad_indirect_tailcall_untrusted_nocfg
         paciasp
         stp     x29, x30, [sp, #-0x10]!
         adr     x3, 1f
@@ -515,19 +524,20 @@ good_indirect_tailcall_no_clobber_v83:
         .globl  bad_indirect_tailcall_untrusted_v83
         .type   bad_indirect_tailcall_untrusted_v83, at function
 bad_indirect_tailcall_untrusted_v83:
-// CHECK-LABEL: GS-PAUTH: untrusted link register found before tail call in function bad_indirect_tailcall_untrusted_v83, basic block {{[^,]+}}, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      braa    x0, x1 # TAILCALL
-// CHECK-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
-// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_indirect_tailcall_untrusted_v83, basic block {{[^,]+}}, at address
-// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
-// CHECK-NEXT:  The 1 instructions that leak the affected registers are:
-// CHECK-NEXT:  1.     {{[0-9a-f]+}}:      braa    x0, x1 # TAILCALL
-// CHECK-NEXT:  This happens in the following basic block:
-// CHECK-NEXT:  {{[0-9a-f]+}}:   paciasp
-// CHECK-NEXT:  {{[0-9a-f]+}}:   stp     x29, x30, [sp, #-0x10]!
-// CHECK-NEXT:  {{[0-9a-f]+}}:   ldp     x29, x30, [sp], #0x10
-// CHECK-NEXT:  {{[0-9a-f]+}}:   autiasp
-// CHECK-NEXT:  {{[0-9a-f]+}}:   braa    x0, x1 # TAILCALL
+// FPAC-NOT: bad_indirect_tailcall_untrusted_v83
+// NOFPAC-LABEL: GS-PAUTH: untrusted link register found before tail call in function bad_indirect_tailcall_untrusted_v83, basic block {{[^,]+}}, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      braa    x0, x1 # TAILCALL
+// NOFPAC-NEXT:  The 0 instructions that write to the affected registers after any authentication are:
+// NOFPAC-LABEL: GS-PAUTH: authentication oracle found in function bad_indirect_tailcall_untrusted_v83, basic block {{[^,]+}}, at address
+// NOFPAC-NEXT:  The instruction is     {{[0-9a-f]+}}:      autiasp
+// NOFPAC-NEXT:  The 1 instructions that leak the affected registers are:
+// NOFPAC-NEXT:  1.     {{[0-9a-f]+}}:      braa    x0, x1 # TAILCALL
+// NOFPAC-NEXT:  This happens in the following basic block:
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   paciasp
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   stp     x29, x30, [sp, #-0x10]!
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   ldp     x29, x30, [sp], #0x10
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   autiasp
+// NOFPAC-NEXT:  {{[0-9a-f]+}}:   braa    x0, x1 # TAILCALL
         paciasp
         stp     x29, x30, [sp, #-0x10]!
         ldp     x29, x30, [sp], #0x10

>From ec6e2148855a4f7490ea9678d666c186ecc4b2e0 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Wed, 1 Oct 2025 14:12:45 +0300
Subject: [PATCH 38/48] [BOLT] Gadget scanner: make use of C++17 features and
 LLVM helpers (#141665)

Perform trivial syntactical cleanups:

- make use of structured binding declarations
- use LLVM utility functions when appropriate
- omit braces around single expression inside single-line LLVM_DEBUG()

This patch is NFC aside from minor debug output changes.
---
 bolt/lib/Passes/PAuthGadgetScanner.cpp        | 60 +++++++++----------
 .../AArch64/gs-pauth-debug-output.s           | 14 ++---
 2 files changed, 35 insertions(+), 39 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 9d22d3c8c6cd7..01b350b2f11fe 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -88,8 +88,8 @@ class TrackedRegisters {
   TrackedRegisters(ArrayRef<MCPhysReg> RegsToTrack)
       : Registers(RegsToTrack),
         RegToIndexMapping(getMappingSize(RegsToTrack), NoIndex) {
-    for (unsigned I = 0; I < RegsToTrack.size(); ++I)
-      RegToIndexMapping[RegsToTrack[I]] = I;
+    for (auto [MappedIndex, Reg] : llvm::enumerate(RegsToTrack))
+      RegToIndexMapping[Reg] = MappedIndex;
   }
 
   ArrayRef<MCPhysReg> getRegisters() const { return Registers; }
@@ -203,9 +203,9 @@ struct SrcState {
 
     SafeToDerefRegs &= StateIn.SafeToDerefRegs;
     TrustedRegs &= StateIn.TrustedRegs;
-    for (unsigned I = 0; I < LastInstWritingReg.size(); ++I)
-      for (const MCInst *J : StateIn.LastInstWritingReg[I])
-        LastInstWritingReg[I].insert(J);
+    for (auto [ThisSet, OtherSet] :
+         llvm::zip_equal(LastInstWritingReg, StateIn.LastInstWritingReg))
+      ThisSet.insert_range(OtherSet);
     return *this;
   }
 
@@ -224,11 +224,9 @@ struct SrcState {
 static void printInstsShort(raw_ostream &OS,
                             ArrayRef<SetOfRelatedInsts> Insts) {
   OS << "Insts: ";
-  for (unsigned I = 0; I < Insts.size(); ++I) {
-    auto &Set = Insts[I];
+  for (auto [I, PtrSet] : llvm::enumerate(Insts)) {
     OS << "[" << I << "](";
-    for (const MCInst *MCInstP : Set)
-      OS << MCInstP << " ";
+    interleave(PtrSet, OS, " ");
     OS << ")";
   }
 }
@@ -416,8 +414,9 @@ class SrcSafetyAnalysis {
     // ... an address can be updated in a safe manner, producing the result
     // which is as trusted as the input address.
     if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Point)) {
-      if (Cur.SafeToDerefRegs[DstAndSrc->second])
-        Regs.push_back(DstAndSrc->first);
+      auto [DstReg, SrcReg] = *DstAndSrc;
+      if (Cur.SafeToDerefRegs[SrcReg])
+        Regs.push_back(DstReg);
     }
 
     // Make sure explicit checker sequence keeps register safe-to-dereference
@@ -469,8 +468,9 @@ class SrcSafetyAnalysis {
     // ... an address can be updated in a safe manner, producing the result
     // which is as trusted as the input address.
     if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Point)) {
-      if (Cur.TrustedRegs[DstAndSrc->second])
-        Regs.push_back(DstAndSrc->first);
+      auto [DstReg, SrcReg] = *DstAndSrc;
+      if (Cur.TrustedRegs[SrcReg])
+        Regs.push_back(DstReg);
     }
 
     return Regs;
@@ -865,9 +865,9 @@ struct DstState {
       return (*this = StateIn);
 
     CannotEscapeUnchecked &= StateIn.CannotEscapeUnchecked;
-    for (unsigned I = 0; I < FirstInstLeakingReg.size(); ++I)
-      for (const MCInst *J : StateIn.FirstInstLeakingReg[I])
-        FirstInstLeakingReg[I].insert(J);
+    for (auto [ThisSet, OtherSet] :
+         llvm::zip_equal(FirstInstLeakingReg, StateIn.FirstInstLeakingReg))
+      ThisSet.insert_range(OtherSet);
     return *this;
   }
 
@@ -1033,8 +1033,7 @@ class DstSafetyAnalysis {
 
     // ... an address can be updated in a safe manner, or
     if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Inst)) {
-      MCPhysReg DstReg, SrcReg;
-      std::tie(DstReg, SrcReg) = *DstAndSrc;
+      auto [DstReg, SrcReg] = *DstAndSrc;
       // Note that *all* registers containing the derived values must be safe,
       // both source and destination ones. No temporaries are supported at now.
       if (Cur.CannotEscapeUnchecked[SrcReg] &&
@@ -1074,7 +1073,7 @@ class DstSafetyAnalysis {
     // If this instruction terminates the program immediately, no
     // authentication oracles are possible past this point.
     if (BC.MIB->isTrap(Point)) {
-      LLVM_DEBUG({ traceInst(BC, "Trap instruction found", Point); });
+      LLVM_DEBUG(traceInst(BC, "Trap instruction found", Point));
       DstState Next(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters());
       Next.CannotEscapeUnchecked.set();
       return Next;
@@ -1249,7 +1248,7 @@ class CFGUnawareDstSafetyAnalysis : public DstSafetyAnalysis,
       // starting to analyze Inst.
       if (BC.MIB->isCall(Inst) || BC.MIB->isBranch(Inst) ||
           BC.MIB->isReturn(Inst)) {
-        LLVM_DEBUG({ traceInst(BC, "Control flow instruction", Inst); });
+        LLVM_DEBUG(traceInst(BC, "Control flow instruction", Inst));
         S = createUnsafeState();
       }
 
@@ -1394,7 +1393,7 @@ shouldReportUnsafeTailCall(const BinaryContext &BC, const BinaryFunction &BF,
   // such libc, ignore tail calls performed by ELF entry function.
   if (BC.StartFunctionAddress &&
       *BC.StartFunctionAddress == Inst.getFunction()->getAddress()) {
-    LLVM_DEBUG({ dbgs() << "  Skipping tail call in ELF entry function.\n"; });
+    LLVM_DEBUG(dbgs() << "  Skipping tail call in ELF entry function.\n");
     return std::nullopt;
   }
 
@@ -1468,7 +1467,7 @@ shouldReportAuthOracle(const BinaryContext &BC, const MCInstReference &Inst,
   });
 
   if (S.empty()) {
-    LLVM_DEBUG({ dbgs() << "    DstState is empty!\n"; });
+    LLVM_DEBUG(dbgs() << "    DstState is empty!\n");
     return make_generic_report(
         Inst, "Warning: no state computed for an authentication instruction "
               "(possibly unreachable)");
@@ -1495,7 +1494,7 @@ collectRegsToTrack(ArrayRef<PartialReport<MCPhysReg>> Reports) {
 void FunctionAnalysisContext::findUnsafeUses(
     SmallVector<PartialReport<MCPhysReg>> &Reports) {
   auto Analysis = SrcSafetyAnalysis::create(BF, AllocatorId, {});
-  LLVM_DEBUG({ dbgs() << "Running src register safety analysis...\n"; });
+  LLVM_DEBUG(dbgs() << "Running src register safety analysis...\n");
   Analysis->run();
   LLVM_DEBUG({
     dbgs() << "After src register safety analysis:\n";
@@ -1552,8 +1551,7 @@ void FunctionAnalysisContext::findUnsafeUses(
 
     const SrcState &S = Analysis->getStateBefore(Inst);
     if (S.empty()) {
-      LLVM_DEBUG(
-          { traceInst(BC, "Instruction has no state, skipping", Inst); });
+      LLVM_DEBUG(traceInst(BC, "Instruction has no state, skipping", Inst));
       assert(UnreachableBBReported && "Should be reported at least once");
       (void)UnreachableBBReported;
       return;
@@ -1580,8 +1578,7 @@ void FunctionAnalysisContext::augmentUnsafeUseReports(
   SmallVector<MCPhysReg> RegsToTrack = collectRegsToTrack(Reports);
   // Re-compute the analysis with register tracking.
   auto Analysis = SrcSafetyAnalysis::create(BF, AllocatorId, RegsToTrack);
-  LLVM_DEBUG(
-      { dbgs() << "\nRunning detailed src register safety analysis...\n"; });
+  LLVM_DEBUG(dbgs() << "\nRunning detailed src register safety analysis...\n");
   Analysis->run();
   LLVM_DEBUG({
     dbgs() << "After detailed src register safety analysis:\n";
@@ -1591,7 +1588,7 @@ void FunctionAnalysisContext::augmentUnsafeUseReports(
   // Augment gadget reports.
   for (auto &Report : Reports) {
     MCInstReference Location = Report.Issue->Location;
-    LLVM_DEBUG({ traceInst(BC, "Attaching clobbering info to", Location); });
+    LLVM_DEBUG(traceInst(BC, "Attaching clobbering info to", Location));
     assert(Report.RequestedDetails &&
            "Should be removed by handleSimpleReports");
     auto DetailedInfo =
@@ -1609,7 +1606,7 @@ void FunctionAnalysisContext::findUnsafeDefs(
     return;
 
   auto Analysis = DstSafetyAnalysis::create(BF, AllocatorId, {});
-  LLVM_DEBUG({ dbgs() << "Running dst register safety analysis...\n"; });
+  LLVM_DEBUG(dbgs() << "Running dst register safety analysis...\n");
   Analysis->run();
   LLVM_DEBUG({
     dbgs() << "After dst register safety analysis:\n";
@@ -1632,8 +1629,7 @@ void FunctionAnalysisContext::augmentUnsafeDefReports(
   SmallVector<MCPhysReg> RegsToTrack = collectRegsToTrack(Reports);
   // Re-compute the analysis with register tracking.
   auto Analysis = DstSafetyAnalysis::create(BF, AllocatorId, RegsToTrack);
-  LLVM_DEBUG(
-      { dbgs() << "\nRunning detailed dst register safety analysis...\n"; });
+  LLVM_DEBUG(dbgs() << "\nRunning detailed dst register safety analysis...\n");
   Analysis->run();
   LLVM_DEBUG({
     dbgs() << "After detailed dst register safety analysis:\n";
@@ -1643,7 +1639,7 @@ void FunctionAnalysisContext::augmentUnsafeDefReports(
   // Augment gadget reports.
   for (auto &Report : Reports) {
     MCInstReference Location = Report.Issue->Location;
-    LLVM_DEBUG({ traceInst(BC, "Attaching leakage info to", Location); });
+    LLVM_DEBUG(traceInst(BC, "Attaching leakage info to", Location));
     assert(Report.RequestedDetails &&
            "Should be removed by handleSimpleReports");
     auto DetailedInfo = std::make_shared<LeakageInfo>(
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
index ee8521ff1f810..a3ad7effe4b0d 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
@@ -177,9 +177,9 @@ clobber:
 // CHECK-EMPTY:
 // CHECK-NEXT: Running detailed src register safety analysis...
 // CHECK-NEXT:   SrcSafetyAnalysis::ComputeNext(   mov     w30, #0x0, src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: LR W30 W30_HI , Insts: [0]()>)
-// CHECK-NEXT:     .. result: (src-state<SafeToDerefRegs: W30_HI , TrustedRegs: W30_HI , Insts: [0](0x{{[0-9a-f]+}} )>)
-// CHECK-NEXT:   SrcSafetyAnalysis::ComputeNext(   ret     x30, src-state<SafeToDerefRegs: W30_HI , TrustedRegs: W30_HI , Insts: [0](0x{{[0-9a-f]+}} )>)
-// CHECK-NEXT:     .. result: (src-state<SafeToDerefRegs: W30_HI , TrustedRegs: W30_HI , Insts: [0](0x{{[0-9a-f]+}} )>)
+// CHECK-NEXT:     .. result: (src-state<SafeToDerefRegs: W30_HI , TrustedRegs: W30_HI , Insts: [0](0x{{[0-9a-f]+}})>)
+// CHECK-NEXT:   SrcSafetyAnalysis::ComputeNext(   ret     x30, src-state<SafeToDerefRegs: W30_HI , TrustedRegs: W30_HI , Insts: [0](0x{{[0-9a-f]+}})>)
+// CHECK-NEXT:     .. result: (src-state<SafeToDerefRegs: W30_HI , TrustedRegs: W30_HI , Insts: [0](0x{{[0-9a-f]+}})>)
 // CHECK-NEXT: After detailed src register safety analysis:
 // CHECK-NEXT: Binary Function "clobber"  {
 // ...
@@ -189,7 +189,7 @@ clobber:
 // Iterating over the reports and attaching clobbering info:
 
 // CHECK-EMPTY:
-// CHECK-NEXT:   Attaching clobbering info to:     00000000:         ret # DataflowSrcSafetyAnalysis: src-state<SafeToDerefRegs: BitVector, TrustedRegs: BitVector, Insts: [0](0x{{[0-9a-f]+}} )>
+// CHECK-NEXT:   Attaching clobbering info to:     00000000:         ret # DataflowSrcSafetyAnalysis: src-state<SafeToDerefRegs: BitVector, TrustedRegs: BitVector, Insts: [0](0x{{[0-9a-f]+}})>
 
         .globl  nocfg
         .type   nocfg, at function
@@ -315,7 +315,7 @@ auth_oracle:
 // AUTH-ORACLES-NEXT:   DstSafetyAnalysis::ComputeNext(       ret     x30, dst-state<CannotEscapeUnchecked: , Insts: [0]()>)
 // AUTH-ORACLES-NEXT:     .. result: (dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: [0]()>)
 // AUTH-ORACLES-NEXT:   DstSafetyAnalysis::ComputeNext(       autia   x0, x1, dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: [0]()>)
-// AUTH-ORACLES-NEXT:     .. result: (dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: [0](0x{{[0-9a-f]+}} )>)
+// AUTH-ORACLES-NEXT:     .. result: (dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: [0](0x{{[0-9a-f]+}})>)
 // AUTH-ORACLES-NEXT: After detailed dst register safety analysis:
 // AUTH-ORACLES-NEXT: Binary Function "auth_oracle"  {
 // AUTH-ORACLES-NEXT:   Number      : 4
@@ -325,14 +325,14 @@ auth_oracle:
 // AUTH-ORACLES-NEXT: }
 // AUTH-ORACLES-NEXT: [[BB0]] (2 instructions, align : 1)
 // AUTH-ORACLES-NEXT:   Entry Point
-// AUTH-ORACLES-NEXT:     00000000:   autia   x0, x1 # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: [0](0x{{[0-9a-f]+}} )>
+// AUTH-ORACLES-NEXT:     00000000:   autia   x0, x1 # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: [0](0x{{[0-9a-f]+}})>
 // AUTH-ORACLES-NEXT:     00000004:   ret # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: [0]()>
 // AUTH-ORACLES-EMPTY:
 // AUTH-ORACLES-NEXT: DWARF CFI Instructions:
 // AUTH-ORACLES-NEXT:     <empty>
 // AUTH-ORACLES-NEXT: End of Function "auth_oracle"
 // AUTH-ORACLES-EMPTY:
-// AUTH-ORACLES-NEXT:   Attaching leakage info to:     00000000:      autia   x0, x1 # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: [0](0x{{[0-9a-f]+}} )>
+// AUTH-ORACLES-NEXT:   Attaching leakage info to:     00000000:      autia   x0, x1 # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: [0](0x{{[0-9a-f]+}})>
 
 // Gadget scanner should not crash on CFI instructions, including when debug-printing them.
 // Note that the particular debug output is not checked, but BOLT should be

>From 56dc69f66f1fed09d6c252d7d71d1e5598827e40 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph at gmail.com>
Date: Thu, 21 Aug 2025 06:13:53 -0700
Subject: [PATCH 39/48] [MLIR] Apply clang-tidy fixes for
 performance-move-const-arg in SimplifyAffineMinMax.cpp (NFC)

---
 mlir/lib/Dialect/Affine/Transforms/SimplifyAffineMinMax.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineMinMax.cpp b/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineMinMax.cpp
index f3e065a12ded0..9821a75a55f49 100644
--- a/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineMinMax.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineMinMax.cpp
@@ -246,6 +246,6 @@ void SimplifyAffineMinMaxPass::runOnOperation() {
   patterns.add<SimplifyAffineMaxOp, SimplifyAffineMinOp, SimplifyAffineApplyOp>(
       func.getContext());
   FrozenRewritePatternSet frozenPatterns(std::move(patterns));
-  if (failed(applyPatternsGreedily(func, std::move(frozenPatterns))))
+  if (failed(applyPatternsGreedily(func, frozenPatterns)))
     return signalPassFailure();
 }

>From 28a150f9a1151a296c8432c4a90df221ffe4ec58 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 1 Oct 2025 12:41:03 +0100
Subject: [PATCH 40/48] [AArch64] Some tests for cbz/tbz with wzr. NFC

---
 llvm/test/CodeGen/AArch64/cbz_wzr.mir | 260 +++++++++++++++++++++
 llvm/test/CodeGen/AArch64/tbz-tbnz.ll | 324 +++++++++++++++++++++++++-
 2 files changed, 582 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/cbz_wzr.mir

diff --git a/llvm/test/CodeGen/AArch64/cbz_wzr.mir b/llvm/test/CodeGen/AArch64/cbz_wzr.mir
new file mode 100644
index 0000000000000..7deea56ba23a1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cbz_wzr.mir
@@ -0,0 +1,260 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -o - %s -mtriple=aarch64-none-eabi -run-pass=machine-cp -mcp-use-is-copy-instr | FileCheck %s
+
+---
+name:            cbz_wzr
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: cbz_wzr
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   CBZW $wzr, %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   $w0 = MOVZWi 10, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   $w0 = MOVZWi 20, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0
+  bb.0:
+    liveins: $x0
+
+    $w8 = ORRWrs $wzr, $wzr, 0
+    CBZW killed renamable $w8, %bb.2
+
+  bb.1:
+    $w0 = MOVZWi 10, 0
+    RET undef $lr, implicit $w0
+
+  bb.2:
+    $w0 = MOVZWi 20, 0
+    RET undef $lr, implicit $w0
+...
+---
+name:            cbnz_wzr
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: cbnz_wzr
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   CBNZW $wzr, %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   $w0 = MOVZWi 10, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   $w0 = MOVZWi 20, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0
+  bb.0:
+    liveins: $x0
+
+    $w8 = ORRWrs $wzr, $wzr, 0
+    CBNZW killed renamable $w8, %bb.2
+
+  bb.1:
+    $w0 = MOVZWi 10, 0
+    RET undef $lr, implicit $w0
+
+  bb.2:
+    $w0 = MOVZWi 20, 0
+    RET undef $lr, implicit $w0
+...
+---
+name:            tbz_wzr
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: tbz_wzr
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   TBZW $wzr, 0, %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   $w0 = MOVZWi 10, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   $w0 = MOVZWi 20, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0
+  bb.0:
+    liveins: $x0
+
+    $w8 = ORRWrs $wzr, $wzr, 0
+    TBZW killed renamable $w8, 0, %bb.2
+
+  bb.1:
+    $w0 = MOVZWi 10, 0
+    RET undef $lr, implicit $w0
+
+  bb.2:
+    $w0 = MOVZWi 20, 0
+    RET undef $lr, implicit $w0
+...
+---
+name:            tbnz_wzr
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: tbnz_wzr
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   TBNZW $wzr, 0, %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   $w0 = MOVZWi 10, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   $w0 = MOVZWi 20, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0
+  bb.0:
+    liveins: $x0
+
+    $w8 = ORRWrs $wzr, $wzr, 0
+    TBNZW killed renamable $w8, 0, %bb.2
+
+  bb.1:
+    $w0 = MOVZWi 10, 0
+    RET undef $lr, implicit $w0
+
+  bb.2:
+    $w0 = MOVZWi 20, 0
+    RET undef $lr, implicit $w0
+...
+
+---
+name:            cbz_xzr
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: cbz_xzr
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   CBZX $xzr, %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   $w0 = MOVZWi 10, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   $w0 = MOVZWi 20, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0
+  bb.0:
+    liveins: $x0
+
+    $x8 = ORRXrs $xzr, $xzr, 0
+    CBZX killed renamable $x8, %bb.2
+
+  bb.1:
+    $w0 = MOVZWi 10, 0
+    RET undef $lr, implicit $w0
+
+  bb.2:
+    $w0 = MOVZWi 20, 0
+    RET undef $lr, implicit $w0
+...
+---
+name:            cbnz_xzr
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: cbnz_xzr
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   CBNZX $xzr, %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   $w0 = MOVZWi 10, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   $w0 = MOVZWi 20, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0
+  bb.0:
+    liveins: $x0
+
+    $x8 = ORRXrs $xzr, $xzr, 0
+    CBNZX killed renamable $x8, %bb.2
+
+  bb.1:
+    $w0 = MOVZWi 10, 0
+    RET undef $lr, implicit $w0
+
+  bb.2:
+    $w0 = MOVZWi 20, 0
+    RET undef $lr, implicit $w0
+...
+---
+name:            tbz_xzr
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: tbz_xzr
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   TBZX $xzr, 0, %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   $w0 = MOVZWi 10, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   $w0 = MOVZWi 20, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0
+  bb.0:
+    liveins: $x0
+
+    $x8 = ORRXrs $xzr, $xzr, 0
+    TBZX killed renamable $x8, 0, %bb.2
+
+  bb.1:
+    $w0 = MOVZWi 10, 0
+    RET undef $lr, implicit $w0
+
+  bb.2:
+    $w0 = MOVZWi 20, 0
+    RET undef $lr, implicit $w0
+...
+---
+name:            tbnz_xzr
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: tbnz_xzr
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   TBNZX $xzr, 0, %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   $w0 = MOVZWi 10, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   $w0 = MOVZWi 20, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0
+  bb.0:
+    liveins: $x0
+
+    $x8 = ORRXrs $xzr, $xzr, 0
+    TBNZX killed renamable $x8, 0, %bb.2
+
+  bb.1:
+    $w0 = MOVZWi 10, 0
+    RET undef $lr, implicit $w0
+
+  bb.2:
+    $w0 = MOVZWi 20, 0
+    RET undef $lr, implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
index 4a04934971711..6946cc23d867d 100644
--- a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
+++ b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -mtriple=aarch64 -O3 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=aarch64 -O3 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 declare void @t()
 
@@ -581,3 +581,323 @@ end:
   ret void
 }
 
+define ptr @tbnz_wzr(i1 %cmp1.not.i, ptr %locflg) {
+; CHECK-SD-LABEL: tbnz_wzr:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    tbz w0, #0, .LBB20_2
+; CHECK-SD-NEXT:  // %bb.1:
+; CHECK-SD-NEXT:    tbnz wzr, #0, .LBB20_3
+; CHECK-SD-NEXT:    b .LBB20_4
+; CHECK-SD-NEXT:  .LBB20_2: // %opnfil.exit.thread
+; CHECK-SD-NEXT:    mov w8, #1 // =0x1
+; CHECK-SD-NEXT:    str wzr, [x1]
+; CHECK-SD-NEXT:    tbz w8, #0, .LBB20_4
+; CHECK-SD-NEXT:  .LBB20_3: // %if.else25
+; CHECK-SD-NEXT:    str wzr, [x1]
+; CHECK-SD-NEXT:  .LBB20_4: // %common.ret
+; CHECK-SD-NEXT:    mov x0, xzr
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: tbnz_wzr:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w8, #0 // =0x0
+; CHECK-GI-NEXT:    tbz w0, #0, .LBB20_3
+; CHECK-GI-NEXT:  // %bb.1: // %if.end10
+; CHECK-GI-NEXT:    tbnz w8, #0, .LBB20_4
+; CHECK-GI-NEXT:  .LBB20_2: // %common.ret
+; CHECK-GI-NEXT:    mov x0, xzr
+; CHECK-GI-NEXT:    ret
+; CHECK-GI-NEXT:  .LBB20_3: // %opnfil.exit.thread
+; CHECK-GI-NEXT:    mov w8, #1 // =0x1
+; CHECK-GI-NEXT:    str wzr, [x1]
+; CHECK-GI-NEXT:    tbz w8, #0, .LBB20_2
+; CHECK-GI-NEXT:  .LBB20_4: // %if.else25
+; CHECK-GI-NEXT:    str wzr, [x1]
+; CHECK-GI-NEXT:    mov x0, xzr
+; CHECK-GI-NEXT:    ret
+entry:
+  br i1 %cmp1.not.i, label %if.end10, label %opnfil.exit.thread
+
+opnfil.exit.thread:                               ; preds = %entry
+  store i32 0, ptr %locflg, align 4
+  br label %if.end10
+
+if.end10:                                         ; preds = %opnfil.exit.thread, %entry
+  %cmp5 = phi i1 [ true, %opnfil.exit.thread ], [ false, %entry ]
+  br i1 %cmp5, label %if.else25, label %if.then12
+
+if.then12:                                        ; preds = %if.end10
+  %call20 = load i32, ptr null, align 4
+  br label %if.end26
+
+if.else25:                                        ; preds = %if.end10
+  store i32 0, ptr %locflg, align 4
+  br label %if.end26
+
+if.end26:                                         ; preds = %if.else25, %if.then12
+  br i1 %cmp5, label %common.ret, label %if.then28
+
+common.ret:                                       ; preds = %if.then28, %if.end26
+  %common.ret.op = phi ptr [ null, %if.then28 ], [ null, %if.end26 ]
+  ret ptr %common.ret.op
+
+if.then28:                                        ; preds = %if.end26
+  %0 = load ptr, ptr null, align 8
+  br label %common.ret
+}
+
+define ptr @tbz_wzr(i1 %cmp1.not.i, ptr %locflg) {
+; CHECK-SD-LABEL: tbz_wzr:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    tbz w0, #0, .LBB21_2
+; CHECK-SD-NEXT:  // %bb.1:
+; CHECK-SD-NEXT:    mov w8, #1 // =0x1
+; CHECK-SD-NEXT:    tbnz w8, #0, .LBB21_3
+; CHECK-SD-NEXT:    b .LBB21_4
+; CHECK-SD-NEXT:  .LBB21_2: // %opnfil.exit.thread
+; CHECK-SD-NEXT:    str wzr, [x1]
+; CHECK-SD-NEXT:    tbz wzr, #0, .LBB21_4
+; CHECK-SD-NEXT:  .LBB21_3: // %if.else25
+; CHECK-SD-NEXT:    str wzr, [x1]
+; CHECK-SD-NEXT:  .LBB21_4: // %common.ret
+; CHECK-SD-NEXT:    mov x0, xzr
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: tbz_wzr:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w8, #1 // =0x1
+; CHECK-GI-NEXT:    tbz w0, #0, .LBB21_3
+; CHECK-GI-NEXT:  // %bb.1: // %if.end10
+; CHECK-GI-NEXT:    tbnz w8, #0, .LBB21_4
+; CHECK-GI-NEXT:  .LBB21_2: // %common.ret
+; CHECK-GI-NEXT:    mov x0, xzr
+; CHECK-GI-NEXT:    ret
+; CHECK-GI-NEXT:  .LBB21_3: // %opnfil.exit.thread
+; CHECK-GI-NEXT:    mov w8, #0 // =0x0
+; CHECK-GI-NEXT:    str wzr, [x1]
+; CHECK-GI-NEXT:    tbz w8, #0, .LBB21_2
+; CHECK-GI-NEXT:  .LBB21_4: // %if.else25
+; CHECK-GI-NEXT:    str wzr, [x1]
+; CHECK-GI-NEXT:    mov x0, xzr
+; CHECK-GI-NEXT:    ret
+entry:
+  br i1 %cmp1.not.i, label %if.end10, label %opnfil.exit.thread
+
+opnfil.exit.thread:                               ; preds = %entry
+  store i32 0, ptr %locflg, align 4
+  br label %if.end10
+
+if.end10:                                         ; preds = %opnfil.exit.thread, %entry
+  %cmp5 = phi i1 [ false, %opnfil.exit.thread ], [ true, %entry ]
+  br i1 %cmp5, label %if.else25, label %if.then12
+
+if.then12:                                        ; preds = %if.end10
+  %call20 = load i32, ptr null, align 4
+  br label %if.end26
+
+if.else25:                                        ; preds = %if.end10
+  store i32 0, ptr %locflg, align 4
+  br label %if.end26
+
+if.end26:                                         ; preds = %if.else25, %if.then12
+  br i1 %cmp5, label %common.ret, label %if.then28
+
+common.ret:                                       ; preds = %if.then28, %if.end26
+  %common.ret.op = phi ptr [ null, %if.then28 ], [ null, %if.end26 ]
+  ret ptr %common.ret.op
+
+if.then28:                                        ; preds = %if.end26
+  %0 = load ptr, ptr null, align 8
+  br label %common.ret
+}
+
+define ptr @cbnz_wzr(i1 %cmp1.not.i, ptr %locflg) {
+; CHECK-SD-LABEL: cbnz_wzr:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    tbz w0, #0, .LBB22_2
+; CHECK-SD-NEXT:  // %bb.1:
+; CHECK-SD-NEXT:    cbnz wzr, .LBB22_3
+; CHECK-SD-NEXT:    b .LBB22_4
+; CHECK-SD-NEXT:  .LBB22_2: // %opnfil.exit.thread
+; CHECK-SD-NEXT:    mov w8, #10 // =0xa
+; CHECK-SD-NEXT:    str wzr, [x1]
+; CHECK-SD-NEXT:    cbz w8, .LBB22_4
+; CHECK-SD-NEXT:  .LBB22_3: // %if.else25
+; CHECK-SD-NEXT:    str wzr, [x1]
+; CHECK-SD-NEXT:  .LBB22_4: // %common.ret
+; CHECK-SD-NEXT:    mov x0, xzr
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: cbnz_wzr:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w8, wzr
+; CHECK-GI-NEXT:    tbnz w0, #0, .LBB22_2
+; CHECK-GI-NEXT:  // %bb.1: // %opnfil.exit.thread
+; CHECK-GI-NEXT:    mov w8, #10 // =0xa
+; CHECK-GI-NEXT:    str wzr, [x1]
+; CHECK-GI-NEXT:  .LBB22_2: // %if.end10
+; CHECK-GI-NEXT:    cbz w8, .LBB22_4
+; CHECK-GI-NEXT:  // %bb.3: // %if.else25
+; CHECK-GI-NEXT:    str wzr, [x1]
+; CHECK-GI-NEXT:  .LBB22_4: // %common.ret
+; CHECK-GI-NEXT:    mov x0, xzr
+; CHECK-GI-NEXT:    ret
+entry:
+  br i1 %cmp1.not.i, label %if.end10, label %opnfil.exit.thread
+
+opnfil.exit.thread:                               ; preds = %entry
+  store i32 0, ptr %locflg, align 4
+  br label %if.end10
+
+if.end10:                                         ; preds = %opnfil.exit.thread, %entry
+  %cmp5 = phi i32 [ 10, %opnfil.exit.thread ], [ 0, %entry ]
+  %cmp5b = icmp ne i32 %cmp5, 0
+  br i1 %cmp5b, label %if.else25, label %if.then12
+
+if.then12:                                        ; preds = %if.end10
+  %call20 = load i32, ptr null, align 4
+  br label %if.end26
+
+if.else25:                                        ; preds = %if.end10
+  store i32 0, ptr %locflg, align 4
+  br label %if.end26
+
+if.end26:                                         ; preds = %if.else25, %if.then12
+  br i1 %cmp5b, label %common.ret, label %if.then28
+
+common.ret:                                       ; preds = %if.then28, %if.end26
+  %common.ret.op = phi ptr [ null, %if.then28 ], [ null, %if.end26 ]
+  ret ptr %common.ret.op
+
+if.then28:                                        ; preds = %if.end26
+  %0 = load ptr, ptr null, align 8
+  br label %common.ret
+}
+
+define ptr @cbz_wzr(i1 %cmp1.not.i, ptr %locflg) {
+; CHECK-SD-LABEL: cbz_wzr:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    tbz w0, #0, .LBB23_2
+; CHECK-SD-NEXT:  // %bb.1:
+; CHECK-SD-NEXT:    mov w8, #10 // =0xa
+; CHECK-SD-NEXT:    cbnz w8, .LBB23_3
+; CHECK-SD-NEXT:    b .LBB23_4
+; CHECK-SD-NEXT:  .LBB23_2: // %opnfil.exit.thread
+; CHECK-SD-NEXT:    str wzr, [x1]
+; CHECK-SD-NEXT:    cbz wzr, .LBB23_4
+; CHECK-SD-NEXT:  .LBB23_3: // %if.else25
+; CHECK-SD-NEXT:    str wzr, [x1]
+; CHECK-SD-NEXT:  .LBB23_4: // %common.ret
+; CHECK-SD-NEXT:    mov x0, xzr
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: cbz_wzr:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w8, #10 // =0xa
+; CHECK-GI-NEXT:    tbnz w0, #0, .LBB23_2
+; CHECK-GI-NEXT:  // %bb.1: // %opnfil.exit.thread
+; CHECK-GI-NEXT:    mov w8, wzr
+; CHECK-GI-NEXT:    str wzr, [x1]
+; CHECK-GI-NEXT:  .LBB23_2: // %if.end10
+; CHECK-GI-NEXT:    cbz w8, .LBB23_4
+; CHECK-GI-NEXT:  // %bb.3: // %if.else25
+; CHECK-GI-NEXT:    str wzr, [x1]
+; CHECK-GI-NEXT:  .LBB23_4: // %common.ret
+; CHECK-GI-NEXT:    mov x0, xzr
+; CHECK-GI-NEXT:    ret
+entry:
+  br i1 %cmp1.not.i, label %if.end10, label %opnfil.exit.thread
+
+opnfil.exit.thread:                               ; preds = %entry
+  store i32 0, ptr %locflg, align 4
+  br label %if.end10
+
+if.end10:                                         ; preds = %opnfil.exit.thread, %entry
+  %cmp5 = phi i32 [ 0, %opnfil.exit.thread ], [ 10, %entry ]
+  %cmp5b = icmp ne i32 %cmp5, 0
+  br i1 %cmp5b, label %if.else25, label %if.then12
+
+if.then12:                                        ; preds = %if.end10
+  %call20 = load i32, ptr null, align 4
+  br label %if.end26
+
+if.else25:                                        ; preds = %if.end10
+  store i32 0, ptr %locflg, align 4
+  br label %if.end26
+
+if.end26:                                         ; preds = %if.else25, %if.then12
+  br i1 %cmp5b, label %common.ret, label %if.then28
+
+common.ret:                                       ; preds = %if.then28, %if.end26
+  %common.ret.op = phi ptr [ null, %if.then28 ], [ null, %if.end26 ]
+  ret ptr %common.ret.op
+
+if.then28:                                        ; preds = %if.end26
+  %0 = load ptr, ptr null, align 8
+  br label %common.ret
+}
+
+define i1 @avifSequenceHeaderParse() {
+; CHECK-SD-LABEL: avifSequenceHeaderParse:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    mov w8, #1 // =0x1
+; CHECK-SD-NEXT:    cbz w8, .LBB24_2
+; CHECK-SD-NEXT:  .LBB24_1: // %bb6
+; CHECK-SD-NEXT:    mov w0, wzr
+; CHECK-SD-NEXT:    ret
+; CHECK-SD-NEXT:  .LBB24_2: // %bb1
+; CHECK-SD-NEXT:    cbz w8, .LBB24_4
+; CHECK-SD-NEXT:  // %bb.3:
+; CHECK-SD-NEXT:    tbz xzr, #63, .LBB24_1
+; CHECK-SD-NEXT:    b .LBB24_5
+; CHECK-SD-NEXT:  .LBB24_4: // %bb2
+; CHECK-SD-NEXT:    mov w8, #1 // =0x1
+; CHECK-SD-NEXT:    tbz x8, #63, .LBB24_1
+; CHECK-SD-NEXT:  .LBB24_5: // %bb4
+; CHECK-SD-NEXT:    mov w8, #1 // =0x1
+; CHECK-SD-NEXT:    mov w0, wzr
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: avifSequenceHeaderParse:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w0, wzr
+; CHECK-GI-NEXT:    mov w8, #1 // =0x1
+; CHECK-GI-NEXT:    ret
+entry:
+  %a = icmp slt i64 0, 0
+  br i1 %a, label %bb1, label %bb6
+
+bb1:                                 ; preds = %entry
+  %b = icmp eq i32 1, 0
+  br i1 %b, label %bb2, label %bb3
+
+bb2:                                  ; preds = %bb1
+  %c = load i8, ptr null, align 1
+  %d = zext i8 1 to i64
+  %e = shl i64 %d, 0
+  br label %bb3
+
+bb3:                            ; preds = %bb2, %bb1
+  %f = phi i64 [ %e, %bb2 ], [ 0, %bb1 ]
+  %g = icmp slt i64 %f, 0
+  br i1 %g, label %bb4, label %bb6
+
+bb4:                                 ; preds = %bb3
+  %h = icmp eq i32 1, 0
+  br i1 %h, label %bb5, label %bb7
+
+bb5:                                  ; preds = %bb4
+  %i = load i8, ptr null, align 1
+  %j = shl i64 0, 0
+  br label %bb7
+
+bb6:                                      ; preds = %bb7, %bb3, %entry
+  %k = phi i1 [ false, %bb7 ], [ false, %bb3 ], [ false, %entry ]
+  ret i1 %k
+
+bb7:                            ; preds = %bb5, %bb4
+  %l = phi ptr [ inttoptr (i64 1 to ptr), %bb5 ], [ null, %bb4 ]
+  %m = phi i64 [ %j, %bb5 ], [ 0, %bb4 ]
+  %n = icmp ult ptr %l, null
+  br label %bb6
+}

>From a7c5eddf02baf96efc09c2612e2bfd2ff8dcbbed Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph at gmail.com>
Date: Thu, 21 Aug 2025 05:05:47 -0700
Subject: [PATCH 41/48] [MLIR] Apply clang-tidy fixes for
 performance-unnecessary-value-param in Rewrite.cpp (NFC)

---
 mlir/lib/Bindings/Python/Rewrite.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Bindings/Python/Rewrite.cpp b/mlir/lib/Bindings/Python/Rewrite.cpp
index f18298ecaf415..836f44fd7d4be 100644
--- a/mlir/lib/Bindings/Python/Rewrite.cpp
+++ b/mlir/lib/Bindings/Python/Rewrite.cpp
@@ -127,7 +127,7 @@ class PyFrozenRewritePatternSet {
         mlirPythonFrozenRewritePatternSetToCapsule(get()));
   }
 
-  static nb::object createFromCapsule(nb::object capsule) {
+  static nb::object createFromCapsule(const nb::object &capsule) {
     MlirFrozenRewritePatternSet rawPm =
         mlirPythonCapsuleToFrozenRewritePatternSet(capsule.ptr());
     if (rawPm.ptr == nullptr)

>From 8f6a90857e2f834570cabf04fb41a5774138b5d5 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph at gmail.com>
Date: Thu, 21 Aug 2025 10:29:19 -0700
Subject: [PATCH 42/48] [MLIR] Apply clang-tidy fixes for
 performance-unnecessary-copy-initialization in InferIntRangeCommon.cpp (NFC)

---
 mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp b/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp
index af4ea5ac1cec8..0f28cbc751c1c 100644
--- a/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp
+++ b/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp
@@ -304,7 +304,7 @@ static ConstantIntRanges inferDivURange(const ConstantIntRanges &lhs,
     umin = lhsMin.udiv(rhsMax);
 
   // X u/ Y u<= X.
-  APInt umax = lhsMax;
+  const APInt &umax = lhsMax;
   return ConstantIntRanges::fromUnsigned(umin, umax);
 }
 

>From bc51fbd779bee72f05618ea21d804b919cc1b25a Mon Sep 17 00:00:00 2001
From: Timm Baeder <tbaeder at redhat.com>
Date: Wed, 1 Oct 2025 13:56:38 +0200
Subject: [PATCH 43/48] [clang][bytecode] Fix integral cast edge case (#161506)

We were converting the `ASInt` to as sign-less `APInt` too early and
losing the sign information.
---
 clang/lib/AST/ByteCode/Compiler.cpp  | 12 ++++++++++--
 clang/test/AST/ByteCode/literals.cpp |  2 ++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 0b7b6cd64dd97..c71fd22fe9d7e 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -540,7 +540,8 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
     if (const auto *IL = dyn_cast<IntegerLiteral>(SubExpr)) {
       if (ToT != PT_IntAP && ToT != PT_IntAPS && FromT != PT_IntAP &&
           FromT != PT_IntAPS && !CE->getType()->isEnumeralType())
-        return this->emitConst(IL->getValue(), CE);
+        return this->emitConst(APSInt(IL->getValue(), !isSignedType(*FromT)),
+                               CE);
       if (!this->emitConst(IL->getValue(), SubExpr))
         return false;
     } else {
@@ -4541,7 +4542,14 @@ bool Compiler<Emitter>::emitConst(T Value, const Expr *E) {
 template <class Emitter>
 bool Compiler<Emitter>::emitConst(const APSInt &Value, PrimType Ty,
                                   const Expr *E) {
-  return this->emitConst(static_cast<const APInt &>(Value), Ty, E);
+  if (Ty == PT_IntAPS)
+    return this->emitConstIntAPS(Value, E);
+  if (Ty == PT_IntAP)
+    return this->emitConstIntAP(Value, E);
+
+  if (Value.isSigned())
+    return this->emitConst(Value.getSExtValue(), Ty, E);
+  return this->emitConst(Value.getZExtValue(), Ty, E);
 }
 
 template <class Emitter>
diff --git a/clang/test/AST/ByteCode/literals.cpp b/clang/test/AST/ByteCode/literals.cpp
index 5bc3f7f4c815c..5028ebfa3de30 100644
--- a/clang/test/AST/ByteCode/literals.cpp
+++ b/clang/test/AST/ByteCode/literals.cpp
@@ -28,6 +28,8 @@ static_assert(number != 10, ""); // both-error{{failed}} \
 static_assert(__objc_yes, "");
 static_assert(!__objc_no, "");
 
+static_assert((long long)0x00000000FFFF0000 == 4294901760, "");
+
 constexpr bool b = number;
 static_assert(b, "");
 constexpr int one = true;

>From 3e55bb2d8551eba5077680fdb20c06e61294d8e1 Mon Sep 17 00:00:00 2001
From: Ivan Butygin <ivan.butygin at gmail.com>
Date: Wed, 1 Oct 2025 15:01:37 +0300
Subject: [PATCH 44/48] [mlir][memref] Introduce `memref.distinct_objects` op
 (#156913)

The `distinct_objects` operation takes a list of memrefs and returns a
list of memrefs of the same types, with the additional assumption that
accesses to these memrefs will never alias with each other. This means
that loads and stores to different memrefs in the list can be safely
reordered.

The discussion
https://discourse.llvm.org/t/rfc-introducing-memref-aliasing-attributes/88049
---
 .../mlir/Dialect/MemRef/IR/MemRefOps.td       | 39 +++++++++++++-
 .../Conversion/MemRefToLLVM/MemRefToLLVM.cpp  | 52 +++++++++++++++++--
 mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp      | 23 ++++++++
 .../MemRefToLLVM/memref-to-llvm.mlir          | 30 +++++++++++
 mlir/test/Dialect/MemRef/invalid.mlir         | 16 ++++++
 mlir/test/Dialect/MemRef/ops.mlir             |  9 ++++
 6 files changed, 164 insertions(+), 5 deletions(-)

diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
index 2bf953e32ccce..d4d67bfb278d5 100644
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
@@ -155,7 +155,7 @@ def AssumeAlignmentOp : MemRef_Op<"assume_alignment", [
       The `assume_alignment` operation takes a memref and an integer alignment
       value. It returns a new SSA value of the same memref type, but associated
       with the assumption that the underlying buffer is aligned to the given
-      alignment. 
+      alignment.
 
       If the buffer isn't aligned to the given alignment, its result is poison.
       This operation doesn't affect the semantics of a program where the
@@ -170,7 +170,7 @@ def AssumeAlignmentOp : MemRef_Op<"assume_alignment", [
   let assemblyFormat = "$memref `,` $alignment attr-dict `:` type($memref)";
   let extraClassDeclaration = [{
     MemRefType getType() { return ::llvm::cast<MemRefType>(getResult().getType()); }
-    
+
     Value getViewSource() { return getMemref(); }
   }];
 
@@ -178,6 +178,41 @@ def AssumeAlignmentOp : MemRef_Op<"assume_alignment", [
   let hasFolder = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// DistinctObjectsOp
+//===----------------------------------------------------------------------===//
+
+def DistinctObjectsOp : MemRef_Op<"distinct_objects", [
+      Pure,
+      DeclareOpInterfaceMethods<InferTypeOpInterface>
+      // ViewLikeOpInterface TODO: ViewLikeOpInterface only supports a single argument
+    ]> {
+  let summary = "assumption that acesses to specific memrefs will never alias";
+  let description = [{
+      The `distinct_objects` operation takes a list of memrefs and returns the same
+      memrefs, with the additional assumption that accesses to them will never
+      alias with each other. This means that loads and stores to different
+      memrefs in the list can be safely reordered.
+
+      If the memrefs do alias, the load/store behavior is undefined. This
+      operation doesn't affect the semantics of a valid program. It is
+      intended for optimization purposes, allowing the compiler to generate more
+      efficient code based on the non-aliasing assumption. The optimization is
+      best-effort.
+
+      Example:
+
+      ```mlir
+      %1, %2 = memref.distinct_objects %a, %b : memref<?xf32>, memref<?xf32>
+      ```
+  }];
+  let arguments = (ins Variadic<AnyMemRef>:$operands);
+  let results = (outs Variadic<AnyMemRef>:$results);
+
+  let assemblyFormat = "$operands attr-dict `:` type($operands)";
+  let hasVerifier = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // AllocOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
index cc6314cbd1ffe..a6f816aa07377 100644
--- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
+++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
@@ -465,6 +465,51 @@ struct AssumeAlignmentOpLowering
   }
 };
 
+struct DistinctObjectsOpLowering
+    : public ConvertOpToLLVMPattern<memref::DistinctObjectsOp> {
+  using ConvertOpToLLVMPattern<
+      memref::DistinctObjectsOp>::ConvertOpToLLVMPattern;
+  explicit DistinctObjectsOpLowering(const LLVMTypeConverter &converter)
+      : ConvertOpToLLVMPattern<memref::DistinctObjectsOp>(converter) {}
+
+  LogicalResult
+  matchAndRewrite(memref::DistinctObjectsOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    ValueRange operands = adaptor.getOperands();
+    if (operands.size() <= 1) {
+      // Fast path.
+      rewriter.replaceOp(op, operands);
+      return success();
+    }
+
+    Location loc = op.getLoc();
+    SmallVector<Value> ptrs;
+    for (auto [origOperand, newOperand] :
+         llvm::zip_equal(op.getOperands(), operands)) {
+      auto memrefType = cast<MemRefType>(origOperand.getType());
+      MemRefDescriptor memRefDescriptor(newOperand);
+      Value ptr = memRefDescriptor.bufferPtr(rewriter, loc, *getTypeConverter(),
+                                             memrefType);
+      ptrs.push_back(ptr);
+    }
+
+    auto cond =
+        LLVM::ConstantOp::create(rewriter, loc, rewriter.getI1Type(), 1);
+    // Generate separate_storage assumptions for each pair of pointers.
+    for (auto i : llvm::seq<size_t>(ptrs.size() - 1)) {
+      for (auto j : llvm::seq<size_t>(i + 1, ptrs.size())) {
+        Value ptr1 = ptrs[i];
+        Value ptr2 = ptrs[j];
+        LLVM::AssumeOp::create(rewriter, loc, cond,
+                               LLVM::AssumeSeparateStorageTag{}, ptr1, ptr2);
+      }
+    }
+
+    rewriter.replaceOp(op, operands);
+    return success();
+  }
+};
+
 // A `dealloc` is converted into a call to `free` on the underlying data buffer.
 // The memref descriptor being an SSA value, there is no need to clean it up
 // in any way.
@@ -1997,22 +2042,23 @@ void mlir::populateFinalizeMemRefToLLVMConversionPatterns(
   patterns.add<
       AllocaOpLowering,
       AllocaScopeOpLowering,
-      AtomicRMWOpLowering,
       AssumeAlignmentOpLowering,
+      AtomicRMWOpLowering,
       ConvertExtractAlignedPointerAsIndex,
       DimOpLowering,
+      DistinctObjectsOpLowering,
       ExtractStridedMetadataOpLowering,
       GenericAtomicRMWOpLowering,
       GetGlobalMemrefOpLowering,
       LoadOpLowering,
       MemRefCastOpLowering,
-      MemorySpaceCastOpLowering,
       MemRefReinterpretCastOpLowering,
       MemRefReshapeOpLowering,
+      MemorySpaceCastOpLowering,
       PrefetchOpLowering,
       RankOpLowering,
-      ReassociatingReshapeOpConversion<memref::ExpandShapeOp>,
       ReassociatingReshapeOpConversion<memref::CollapseShapeOp>,
+      ReassociatingReshapeOpConversion<memref::ExpandShapeOp>,
       StoreOpLowering,
       SubViewOpLowering,
       TransposeOpLowering,
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index 349b4deb29023..e9bdcda296da5 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -606,6 +606,29 @@ AssumeAlignmentOp::bubbleDownCasts(OpBuilder &builder) {
   return bubbleDownCastsPassthroughOpImpl(*this, builder, getMemrefMutable());
 }
 
+//===----------------------------------------------------------------------===//
+// DistinctObjectsOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult DistinctObjectsOp::verify() {
+  if (getOperandTypes() != getResultTypes())
+    return emitOpError("operand types and result types must match");
+
+  if (getOperandTypes().empty())
+    return emitOpError("expected at least one operand");
+
+  return success();
+}
+
+LogicalResult DistinctObjectsOp::inferReturnTypes(
+    MLIRContext * /*context*/, std::optional<Location> /*location*/,
+    ValueRange operands, DictionaryAttr /*attributes*/,
+    OpaqueProperties /*properties*/, RegionRange /*regions*/,
+    SmallVectorImpl<Type> &inferredReturnTypes) {
+  llvm::copy(operands.getTypes(), std::back_inserter(inferredReturnTypes));
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // CastOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir
index 45b1a1f1ca40c..0cbe064572911 100644
--- a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir
+++ b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir
@@ -195,6 +195,36 @@ func.func @assume_alignment(%0 : memref<4x4xf16>) {
 
 // -----
 
+// ALL-LABEL: func @distinct_objects
+//  ALL-SAME:   (%[[ARG0:.*]]: memref<?xf16>, %[[ARG1:.*]]: memref<?xf32>, %[[ARG2:.*]]: memref<?xf64>)
+func.func @distinct_objects(%arg0: memref<?xf16>, %arg1: memref<?xf32>, %arg2: memref<?xf64>) -> (memref<?xf16>, memref<?xf32>, memref<?xf64>) {
+//   ALL-DAG:   %[[CAST_0:.*]] = builtin.unrealized_conversion_cast %[[ARG0]] : memref<?xf16> to !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+//   ALL-DAG:   %[[CAST_1:.*]] = builtin.unrealized_conversion_cast %[[ARG1]] : memref<?xf32> to !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+//   ALL-DAG:   %[[CAST_2:.*]] = builtin.unrealized_conversion_cast %[[ARG2]] : memref<?xf64> to !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+//       ALL:   %[[PTR_0:.*]] = llvm.extractvalue %[[CAST_0]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+//       ALL:   %[[PTR_1:.*]] = llvm.extractvalue %[[CAST_1]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+//       ALL:   %[[PTR_2:.*]] = llvm.extractvalue %[[CAST_2]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+//       ALL:   %[[TRUE:.*]] = llvm.mlir.constant(true) : i1
+//       ALL:   llvm.intr.assume %[[TRUE]] ["separate_storage"(%[[PTR_0]], %[[PTR_1]] : !llvm.ptr, !llvm.ptr)] : i1
+//       ALL:   llvm.intr.assume %[[TRUE]] ["separate_storage"(%[[PTR_0]], %[[PTR_2]] : !llvm.ptr, !llvm.ptr)] : i1
+//       ALL:   llvm.intr.assume %[[TRUE]] ["separate_storage"(%[[PTR_1]], %[[PTR_2]] : !llvm.ptr, !llvm.ptr)] : i1
+  %1, %2, %3 = memref.distinct_objects %arg0, %arg1, %arg2 : memref<?xf16>, memref<?xf32>, memref<?xf64>
+  return %1, %2, %3 : memref<?xf16>, memref<?xf32>, memref<?xf64>
+}
+
+// -----
+
+// ALL-LABEL: func @distinct_objects_noop
+//  ALL-SAME:   (%[[ARG0:.*]]: memref<?xf16>)
+func.func @distinct_objects_noop(%arg0: memref<?xf16>) -> memref<?xf16> {
+// 1-operand version is noop
+//  ALL-NEXT:   return %[[ARG0]]
+  %1 = memref.distinct_objects %arg0 : memref<?xf16>
+  return %1 : memref<?xf16>
+}
+
+// -----
+
 // CHECK-LABEL: func @assume_alignment_w_offset
 // CHECK-INTERFACE-LABEL: func @assume_alignment_w_offset
 func.func @assume_alignment_w_offset(%0 : memref<4x4xf16, strided<[?, ?], offset: ?>>) {
diff --git a/mlir/test/Dialect/MemRef/invalid.mlir b/mlir/test/Dialect/MemRef/invalid.mlir
index 3f96d907632b7..5ff292058ccc1 100644
--- a/mlir/test/Dialect/MemRef/invalid.mlir
+++ b/mlir/test/Dialect/MemRef/invalid.mlir
@@ -1169,3 +1169,19 @@ func.func @expand_shape_invalid_output_shape(
       into memref<2x15x20xf32, strided<[60000, 4000, 2], offset: 100>>
   return
 }
+
+// -----
+
+func.func @distinct_objects_types_mismatch(%arg0: memref<?xf32>, %arg1: memref<?xi32>) -> (memref<?xi32>, memref<?xf32>) {
+  // expected-error @+1 {{operand types and result types must match}}
+  %0, %1 = "memref.distinct_objects"(%arg0, %arg1) : (memref<?xf32>, memref<?xi32>) -> (memref<?xi32>, memref<?xf32>)
+  return %0, %1 : memref<?xi32>, memref<?xf32>
+}
+
+// -----
+
+func.func @distinct_objects_0_operands() {
+  // expected-error @+1 {{expected at least one operand}}
+  "memref.distinct_objects"() : () -> ()
+  return
+}
diff --git a/mlir/test/Dialect/MemRef/ops.mlir b/mlir/test/Dialect/MemRef/ops.mlir
index 6c2298a3f8acb..a90c9505a8405 100644
--- a/mlir/test/Dialect/MemRef/ops.mlir
+++ b/mlir/test/Dialect/MemRef/ops.mlir
@@ -302,6 +302,15 @@ func.func @assume_alignment(%0: memref<4x4xf16>) {
   return
 }
 
+// CHECK-LABEL: func @distinct_objects
+// CHECK-SAME: (%[[ARG0:.*]]: memref<?xf16>, %[[ARG1:.*]]: memref<?xf32>, %[[ARG2:.*]]: memref<?xf64>)
+func.func @distinct_objects(%arg0: memref<?xf16>, %arg1: memref<?xf32>, %arg2: memref<?xf64>) -> (memref<?xf16>, memref<?xf32>, memref<?xf64>) {
+  // CHECK:  %[[RES:.*]]:3 = memref.distinct_objects %[[ARG0]], %[[ARG1]], %[[ARG2]] : memref<?xf16>, memref<?xf32>, memref<?xf64>
+  %1, %2, %3 = memref.distinct_objects %arg0, %arg1, %arg2 : memref<?xf16>, memref<?xf32>, memref<?xf64>
+  // CHECK:  return %[[RES]]#0, %[[RES]]#1, %[[RES]]#2 : memref<?xf16>, memref<?xf32>, memref<?xf64>
+  return %1, %2, %3 : memref<?xf16>, memref<?xf32>, memref<?xf64>
+}
+
 // CHECK-LABEL: func @expand_collapse_shape_static
 func.func @expand_collapse_shape_static(
     %arg0: memref<3x4x5xf32>,

>From da859848444b614b07239b8dd5cd46599bf82470 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Wed, 1 Oct 2025 13:07:21 +0100
Subject: [PATCH 45/48] [lldb][CPlusPlusLanguage] Avoid redundant const char*
 -> StringRef roundtrip (#161499)

We've been seen (very sporadic) lifetime issues around this area. Here's
an example backtrace:
```
[  8] 0x0000000188e56743 libsystem_platform.dylib`_sigtramp + 55
[  9] 0x00000001181e041f LLDB`lldb_private::CPlusPlusLanguage::SymbolNameFitsToLanguage(lldb_private::Mangled) const [inlined] unsigned long std::1::constexpr_strlen[abi:nn200100]<char>(char const*) + 7 at constexpr_c_functions.h:63:10
[  9] 0x00000001181e0418 LLDB`lldb_private::CPlusPlusLanguage::SymbolNameFitsToLanguage(lldb_private::Mangled) const [inlined] std::__1::char_traits<char>::length[abi:nn200100](char const*) at char_traits.h:232:12
[  9] 0x00000001181e0418 LLDB`lldb_private::CPlusPlusLanguage::SymbolNameFitsToLanguage(lldb_private::Mangled) const [inlined] llvm::StringRef::StringRef(char const*) at StringRef.h:90:33
[  9] 0x00000001181e0418 LLDB`lldb_private::CPlusPlusLanguage::SymbolNameFitsToLanguage(lldb_private::Mangled) const [inlined] llvm::StringRef::StringRef(char const*) at StringRef.h:92:38
[  9] 0x00000001181e0418 LLDB`lldb_private::CPlusPlusLanguage::SymbolNameFitsToLanguage(lldb_private::Mangled) const + 20 at CPlusPlusLanguage.cpp:68:62
```

Looks like we're calling `strlen` on a nullptr. I stared at this
codepath for a while but am still not sure how that could happen unless
the underlying `ConstString` somehow pointed to corrupted data.

But `SymbolNameFitsToLanguage` does some roundtripping through a `const
char*` before calling `GetManglingScheme`. No other callsite does this
and it just seems redundant.

This patch cleans this up.

rdar://161128180
---
 .../Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp      | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
index 4e8a430af8c6c..a2199cb65cd35 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
@@ -104,10 +104,10 @@ CPlusPlusLanguage::GetFunctionNameInfo(ConstString name) const {
 }
 
 bool CPlusPlusLanguage::SymbolNameFitsToLanguage(Mangled mangled) const {
-  const char *mangled_name = mangled.GetMangledName().GetCString();
-  auto mangling_scheme = Mangled::GetManglingScheme(mangled_name);
-  return mangled_name && (mangling_scheme == Mangled::eManglingSchemeItanium ||
-                          mangling_scheme == Mangled::eManglingSchemeMSVC);
+  auto mangling_scheme =
+      Mangled::GetManglingScheme(mangled.GetMangledName().GetStringRef());
+  return mangling_scheme == Mangled::eManglingSchemeItanium ||
+         mangling_scheme == Mangled::eManglingSchemeMSVC;
 }
 
 ConstString CPlusPlusLanguage::GetDemangledFunctionNameWithoutArguments(

>From 4f3e84106e057fa5683d153c977dd0bbb72ca2b5 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph at gmail.com>
Date: Wed, 1 Oct 2025 05:09:18 -0700
Subject: [PATCH 46/48] [MLIR] Remove unused debug macros (NFC)

---
 mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
index 094ef0a45b8d2..e51cac4286f0c 100644
--- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
+++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
@@ -173,8 +173,6 @@ struct TestXeGPUUnrollingPatterns
 
 #undef DEBUG_TYPE
 #define DEBUG_TYPE "test-xegpu-layout-interface"
-#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
 
 // Test pattern for distributing vector::StepOp from workgroup to subgroup.
 // Validates DistributeLayoutAttr interfaces for offset computation

>From f21e515244088ed8f65c799849997ee594cf53ee Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Wed, 1 Oct 2025 05:19:42 -0700
Subject: [PATCH 47/48] [NFC][LLVM][AsmWriter] Move type printing to
 `WriteAsOperandInternal` (#161456)

Add option to `WriteAsOperandInternal` to print the type and use that to
eliminate explicit type printing code in several places.
---
 llvm/lib/IR/AsmWriter.cpp | 88 +++++++++++++--------------------------
 1 file changed, 30 insertions(+), 58 deletions(-)

diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 54b92c9d35915..e29179b8f9955 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -1465,7 +1465,8 @@ struct AsmWriterContext {
 //===----------------------------------------------------------------------===//
 
 static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
-                                   AsmWriterContext &WriterCtx);
+                                   AsmWriterContext &WriterCtx,
+                                   bool PrintType = false);
 
 static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
                                    AsmWriterContext &WriterCtx,
@@ -1685,23 +1686,19 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
     ListSeparator LS;
     for (unsigned i = 0, e = NumOpsToWrite; i != e; ++i) {
       Out << LS;
-      WriterCtx.TypePrinter->print(CPA->getOperand(i)->getType(), Out);
-      Out << ' ';
-      WriteAsOperandInternal(Out, CPA->getOperand(i), WriterCtx);
+      WriteAsOperandInternal(Out, CPA->getOperand(i), WriterCtx,
+                             /*PrintType=*/true);
     }
     Out << ')';
     return;
   }
 
   if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
-    Type *ETy = CA->getType()->getElementType();
     Out << '[';
     ListSeparator LS;
     for (const Value *Op : CA->operands()) {
       Out << LS;
-      WriterCtx.TypePrinter->print(ETy, Out);
-      Out << ' ';
-      WriteAsOperandInternal(Out, Op, WriterCtx);
+      WriteAsOperandInternal(Out, Op, WriterCtx, /*PrintType=*/true);
     }
     Out << ']';
     return;
@@ -1717,14 +1714,12 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
       return;
     }
 
-    Type *ETy = CA->getType()->getElementType();
     Out << '[';
     ListSeparator LS;
     for (uint64_t i = 0, e = CA->getNumElements(); i != e; ++i) {
       Out << LS;
-      WriterCtx.TypePrinter->print(ETy, Out);
-      Out << ' ';
-      WriteAsOperandInternal(Out, CA->getElementAsConstant(i), WriterCtx);
+      WriteAsOperandInternal(Out, CA->getElementAsConstant(i), WriterCtx,
+                             /*PrintType=*/true);
     }
     Out << ']';
     return;
@@ -1739,9 +1734,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
       ListSeparator LS;
       for (const Value *Op : CS->operands()) {
         Out << LS;
-        WriterCtx.TypePrinter->print(Op->getType(), Out);
-        Out << ' ';
-        WriteAsOperandInternal(Out, Op, WriterCtx);
+        WriteAsOperandInternal(Out, Op, WriterCtx, /*PrintType=*/true);
       }
       Out << ' ';
     }
@@ -1753,7 +1746,6 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
 
   if (isa<ConstantVector>(CV) || isa<ConstantDataVector>(CV)) {
     auto *CVVTy = cast<FixedVectorType>(CV->getType());
-    Type *ETy = CVVTy->getElementType();
 
     // Use the same shorthand for splat vector (i.e. "splat(Ty val)") as is
     // permitted on IR input to reduce the output changes when enabling
@@ -1763,9 +1755,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
     if (auto *SplatVal = CV->getSplatValue()) {
       if (isa<ConstantInt>(SplatVal) || isa<ConstantFP>(SplatVal)) {
         Out << "splat (";
-        WriterCtx.TypePrinter->print(ETy, Out);
-        Out << ' ';
-        WriteAsOperandInternal(Out, SplatVal, WriterCtx);
+        WriteAsOperandInternal(Out, SplatVal, WriterCtx, /*PrintType=*/true);
         Out << ')';
         return;
       }
@@ -1775,9 +1765,8 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
     ListSeparator LS;
     for (unsigned i = 0, e = CVVTy->getNumElements(); i != e; ++i) {
       Out << LS;
-      WriterCtx.TypePrinter->print(ETy, Out);
-      Out << ' ';
-      WriteAsOperandInternal(Out, CV->getAggregateElement(i), WriterCtx);
+      WriteAsOperandInternal(Out, CV->getAggregateElement(i), WriterCtx,
+                             /*PrintType=*/true);
     }
     Out << '>';
     return;
@@ -1813,9 +1802,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
       if (auto *SplatVal = CE->getSplatValue()) {
         if (isa<ConstantInt>(SplatVal) || isa<ConstantFP>(SplatVal)) {
           Out << "splat (";
-          WriterCtx.TypePrinter->print(SplatVal->getType(), Out);
-          Out << ' ';
-          WriteAsOperandInternal(Out, SplatVal, WriterCtx);
+          WriteAsOperandInternal(Out, SplatVal, WriterCtx, /*PrintType=*/true);
           Out << ')';
           return;
         }
@@ -1834,9 +1821,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
     ListSeparator LS;
     for (const Value *Op : CE->operands()) {
       Out << LS;
-      WriterCtx.TypePrinter->print(Op->getType(), Out);
-      Out << ' ';
-      WriteAsOperandInternal(Out, Op, WriterCtx);
+      WriteAsOperandInternal(Out, Op, WriterCtx, /*PrintType=*/true);
     }
 
     if (CE->isCast()) {
@@ -1864,9 +1849,7 @@ static void writeMDTuple(raw_ostream &Out, const MDTuple *Node,
       Out << "null";
     } else if (auto *MDV = dyn_cast<ValueAsMetadata>(MD)) {
       Value *V = MDV->getValue();
-      WriterCtx.TypePrinter->print(V->getType(), Out);
-      Out << ' ';
-      WriteAsOperandInternal(Out, V, WriterCtx);
+      WriteAsOperandInternal(Out, V, WriterCtx, /*PrintType=*/true);
     } else {
       WriteAsOperandInternal(Out, MD, WriterCtx);
       WriterCtx.onWriteMetadataAsOperand(MD);
@@ -2634,7 +2617,7 @@ static void writeDIArgList(raw_ostream &Out, const DIArgList *N,
   Out << "!DIArgList(";
   ListSeparator FS;
   MDFieldPrinter Printer(Out, WriterCtx);
-  for (Metadata *Arg : N->getArgs()) {
+  for (const Metadata *Arg : N->getArgs()) {
     Out << FS;
     WriteAsOperandInternal(Out, Arg, WriterCtx, true);
   }
@@ -2700,7 +2683,13 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
 // Full implementation of printing a Value as an operand with support for
 // TypePrinting, etc.
 static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
-                                   AsmWriterContext &WriterCtx) {
+                                   AsmWriterContext &WriterCtx,
+                                   bool PrintType) {
+  if (PrintType) {
+    WriterCtx.TypePrinter->print(V->getType(), Out);
+    Out << ' ';
+  }
+
   if (V->hasName()) {
     PrintLLVMName(Out, V);
     return;
@@ -2825,9 +2814,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
   assert((FromValue || !isa<LocalAsMetadata>(V)) &&
          "Unexpected function-local metadata outside of value argument");
 
-  WriterCtx.TypePrinter->print(V->getValue()->getType(), Out);
-  Out << ' ';
-  WriteAsOperandInternal(Out, V->getValue(), WriterCtx);
+  WriteAsOperandInternal(Out, V->getValue(), WriterCtx, /*PrintType=*/true);
 }
 
 namespace {
@@ -2965,12 +2952,8 @@ void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
     Out << "<null operand!>";
     return;
   }
-  if (PrintType) {
-    TypePrinter.print(Operand->getType(), Out);
-    Out << ' ';
-  }
-  auto WriterCtx = getContext();
-  WriteAsOperandInternal(Out, Operand, WriterCtx);
+  auto WriteCtx = getContext();
+  WriteAsOperandInternal(Out, Operand, WriteCtx, PrintType);
 }
 
 void AssemblyWriter::writeSyncScope(const LLVMContext &Context,
@@ -3049,20 +3032,14 @@ void AssemblyWriter::writeOperandBundles(const CallBase *Call) {
 
     Out << '(';
 
-    bool FirstInput = true;
+    ListSeparator InnerLS;
     auto WriterCtx = getContext();
     for (const auto &Input : BU.Inputs) {
-      if (!FirstInput)
-        Out << ", ";
-      FirstInput = false;
-
+      Out << InnerLS;
       if (Input == nullptr)
         Out << "<null operand bundle!>";
-      else {
-        TypePrinter.print(Input->getType(), Out);
-        Out << " ";
-        WriteAsOperandInternal(Out, Input, WriterCtx);
-      }
+      else
+        WriteAsOperandInternal(Out, Input, WriterCtx, /*PrintType=*/true);
     }
 
     Out << ')';
@@ -5265,13 +5242,8 @@ static bool printWithoutType(const Value &V, raw_ostream &O,
 static void printAsOperandImpl(const Value &V, raw_ostream &O, bool PrintType,
                                ModuleSlotTracker &MST) {
   TypePrinting TypePrinter(MST.getModule());
-  if (PrintType) {
-    TypePrinter.print(V.getType(), O);
-    O << ' ';
-  }
-
   AsmWriterContext WriterCtx(&TypePrinter, MST.getMachine(), MST.getModule());
-  WriteAsOperandInternal(O, &V, WriterCtx);
+  WriteAsOperandInternal(O, &V, WriterCtx, PrintType);
 }
 
 void Value::printAsOperand(raw_ostream &O, bool PrintType,

>From ca0075f83c9392dc950045aab6663ef941be08fe Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Wed, 1 Oct 2025 13:28:37 +0100
Subject: [PATCH 48/48] [CodeGen] Remove
 `shouldExpandPartialReductionIntrinsic()` hook (NFC) (#161498)

This is unused. Targets can lower/expand the `PARTIAL_REDUCE_*` ISD
nodes.
---
 llvm/include/llvm/CodeGen/TargetLowering.h            | 7 -------
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 4 ----
 2 files changed, 11 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index c45e03a7bdad8..7bbad172b2d42 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -480,13 +480,6 @@ class LLVM_ABI TargetLoweringBase {
     return true;
   }
 
-  /// Return true if the @llvm.vector.partial.reduce.* intrinsic
-  /// should be expanded using generic code in SelectionDAGBuilder.
-  virtual bool
-  shouldExpandPartialReductionIntrinsic(const IntrinsicInst *I) const {
-    return true;
-  }
-
   /// Return true if the @llvm.get.active.lane.mask intrinsic should be expanded
   /// using generic code in SelectionDAGBuilder.
   virtual bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index b5201a311c591..c21890a0d856f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -8103,10 +8103,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     return;
   }
   case Intrinsic::vector_partial_reduce_add: {
-    if (!TLI.shouldExpandPartialReductionIntrinsic(cast<IntrinsicInst>(&I))) {
-      visitTargetIntrinsic(I, Intrinsic);
-      return;
-    }
     SDValue Acc = getValue(I.getOperand(0));
     SDValue Input = getValue(I.getOperand(1));
     setValue(&I,