[clang] [llvm] [HLSL][DXIL] InterlockedOr and InterlockedOr64 builtins (PR #180804)

Alexander Johnston via cfe-commits cfe-commits at lists.llvm.org
Tue Mar 3 16:57:29 PST 2026


https://github.com/Alexander-Johnston updated https://github.com/llvm/llvm-project/pull/180804

>From 687db9e025d2269743a7a654a65897e0ce2fec72 Mon Sep 17 00:00:00 2001
From: Alexander Johnston <alexander.johnston at amd.com>
Date: Tue, 10 Feb 2026 18:01:11 +0000
Subject: [PATCH 1/5] [HLSL][DXIL] InterlockedOr and Interlocked64 builtins

This includes the first phase of implementation of the InterlockedOr intrinsic.
This covers the usage of the intrinsic/builtin on RWByteAddressBuffers, Typed
Buffers, and Structured Buffers. Not covered are textures, groupshared memory,
and the standalone InterlockedOr(buf[index], val, ret) intrinsics.

SPIRV implementation is not covered in this commit.
---
 clang/include/clang/Basic/Builtins.td         |  24 +++
 .../clang/Basic/DiagnosticSemaKinds.td        |   3 +
 clang/lib/CodeGen/CGHLSLBuiltins.cpp          | 128 +++++++++++++
 clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp |  51 +++++
 clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h   |   2 +
 clang/lib/Sema/HLSLExternalSemaSource.cpp     |   2 +
 clang/lib/Sema/SemaHLSL.cpp                   | 179 ++++++++++++++++++
 .../builtins/Interlocked-or-builtin.hlsl      |  76 ++++++++
 .../CodeGenHLSL/builtins/Interlocked-or.hlsl  |  98 ++++++++++
 .../BuiltIns/interlocked-or-errors.hlsl       |  84 ++++++++
 .../BuiltIns/interlocked-or64-errors.hlsl     |  74 ++++++++
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |   3 +
 llvm/lib/Target/DirectX/DXIL.td               |   8 +
 llvm/lib/Target/DirectX/DXILOpLowering.cpp    |  60 ++++++
 llvm/test/CodeGen/DirectX/interlocked-or.ll   | 117 ++++++++++++
 llvm/test/CodeGen/DirectX/interlocked-or64.ll | 117 ++++++++++++
 16 files changed, 1026 insertions(+)
 create mode 100644 clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl
 create mode 100644 clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/interlocked-or-errors.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/interlocked-or64-errors.hlsl
 create mode 100644 llvm/test/CodeGen/DirectX/interlocked-or.ll
 create mode 100644 llvm/test/CodeGen/DirectX/interlocked-or64.ll

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 05e3af4a0e96f..374ff6470d91e 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5379,6 +5379,30 @@ def HLSLDdyFine : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLInterlockedOr : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_interlocked_or"];
+  let Attributes = [NoThrow, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
+def HLSLInterlockedOrRet : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_interlocked_or_ret"];
+  let Attributes = [NoThrow, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
+def HLSLInterlockedOr64 : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_interlocked_or64"];
+  let Attributes = [NoThrow, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
+def HLSLInterlockedOrRet64 : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_interlocked_or_ret64"];
+  let Attributes = [NoThrow, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f999c362307af..384611a97dee3 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -13492,6 +13492,9 @@ def err_hlsl_assign_to_global_resource: Error<
 def err_hlsl_push_constant_unique
     : Error<"cannot have more than one push constant block">;
 
+def err_hlsl_intrinsic_in_wrong_shader_model: Error<
+  "intrinsic %0 requires shader model %1 or greater">;
+
 // Layout randomization diagnostics.
 def err_non_designated_init_used : Error<
   "a randomized struct can only be initialized with a designated initializer">;
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index c72eef1982e9e..39d716bea91bf 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -300,6 +300,122 @@ static Value *handleElementwiseF32ToF16(CodeGenFunction &CGF,
   llvm_unreachable("Intrinsic F32ToF16 not supported by target architecture");
 }
 
+// Not sure where would be best for this to live
+// AtomicBinOp uses an i32 to determine the operation mode as follows
+enum AtomicOperationCode : uint {
+  Add = 0,
+  And = 1,
+  Or = 2,
+  Xor = 3,
+  IMin = 4,
+  IMax = 5,
+  UMin = 6,
+  UMax = 7,
+  Exchange = 8
+};
+
+static Value *handleAtomicBinOp(CodeGenFunction &CGF, const CallExpr *E,
+                                const AtomicOperationCode OpCode,
+                                const bool HasReturn, const bool Is32Bit) {
+  Value *HandleOp = CGF.EmitScalarExpr(E->getArg(0));
+  Value *IndexOp = CGF.EmitScalarExpr(E->getArg(1));
+  Value *StructuredBufIndexOp;
+  Value *NewValueOp;
+  Value *OldValueOp;
+  unsigned OldValueArgIdx;
+  if (E->getNumArgs() == 3) {
+    // (handle, index, newValue)
+    NewValueOp = CGF.EmitScalarExpr(E->getArg(2));
+  } else if (E->getNumArgs() == 4) {
+    if (HasReturn) {
+      // (handle, index, newValue, oldValue)
+      NewValueOp = CGF.EmitScalarExpr(E->getArg(2));
+      OldValueArgIdx = 3;
+    } else {
+      // (handle, index, index, newValue)
+      StructuredBufIndexOp = CGF.EmitScalarExpr(E->getArg(2));
+      NewValueOp = CGF.EmitScalarExpr(E->getArg(3));
+    }
+  } else {
+    // (handle, index, index, newValue, oldValue)
+    StructuredBufIndexOp = CGF.EmitScalarExpr(E->getArg(2));
+    NewValueOp = CGF.EmitScalarExpr(E->getArg(3));
+    OldValueArgIdx = 4;
+  }
+
+  switch (CGF.CGM.getTarget().getTriple().getArch()) {
+  case llvm::Triple::dxil: {
+    QualType HandleTy = E->getArg(0)->getType();
+    const HLSLAttributedResourceType *ResourceTy =
+        HandleTy->getAs<HLSLAttributedResourceType>();
+
+    // AtomicBinOp uses an i32 to determine the operation mode as follows
+    // Add: 0, And: 1, Or: 2, Xor: 3, IMin: 4, IMax: 5, UMin: 6, UMax: 7,
+    // Exchange: 8
+    Value *ModeConstant = ConstantInt::get(CGF.Int32Ty, OpCode);
+
+    // AtomicBinOp has 3 coordinate params which must be handled differently
+    // depending on the resource type being accessed.
+    // Initially undef all the coordinates then fill as required
+    Value *Undef = UndefValue::get(CGF.Int32Ty);
+    Value *C0 = Undef;
+    Value *C1 = Undef;
+    Value *C2 = Undef;
+    if (!ResourceTy->getAttrs().RawBuffer) {
+      assert(
+          (ResourceTy->getContainedType() == CGF.getContext().IntTy ||
+           ResourceTy->getContainedType() == CGF.getContext().UnsignedIntTy) &&
+          "AtomicBinOp RWBuffer must contain int or uint");
+      // RWBuffer: c0
+      C0 = IndexOp;
+
+      // RWByteAddressBuffers are output as char8_t, but as that isn't
+      // recognised by HLSL we can't use it as an attribute to define them in
+      // tests, so must also check for char ([[hlsl::contained_type(char)]])
+    } else if (ResourceTy->getContainedType() == CGF.getContext().Char8Ty ||
+               ResourceTy->getContainedType() == CGF.getContext().CharTy) {
+      // RWByteAddressBuffer: c0
+      C0 = IndexOp;
+    } else {
+      // RWStructuredBuffer: c0 and c1
+      C0 = IndexOp;
+      C1 = StructuredBufIndexOp;
+    }
+    assert(C0 != Undef && "Failed to identify coordinates for Interlocked");
+    // TODO: Add coordinate logic for texture and groupshared
+
+    // atomicBinOp
+    // opcode, handle, binary operation code, coordinates c0, c1, c2, new val
+    if (Is32Bit) {
+      Intrinsic::ID ID = Intrinsic::dx_resource_atomicbinop;
+      OldValueOp = CGF.Builder.CreateIntrinsic(
+          /*ReturnType=*/CGF.Int32Ty, ID,
+          ArrayRef<Value *>{HandleOp, ModeConstant, C0, C1, C2, NewValueOp},
+          nullptr, "hlsl.interlocked.or");
+    } else {
+      Intrinsic::ID ID = Intrinsic::dx_resource_atomicbinop64;
+      OldValueOp = CGF.Builder.CreateIntrinsic(
+          /*ReturnType=*/CGF.Int64Ty, ID,
+          ArrayRef<Value *>{HandleOp, ModeConstant, C0, C1, C2, NewValueOp},
+          nullptr, "hlsl.interlocked.or");
+    }
+    break;
+  }
+  default:
+    llvm_unreachable(
+        "Interlocked intrinsic not supported by target architecture");
+  }
+
+  // Destination may or may not be provided
+  // If it is provided create a store to it
+  if (HasReturn) {
+    LValue DestOp = CGF.EmitLValue(E->getArg(OldValueArgIdx));
+    return CGF.Builder.CreateStore(OldValueOp, DestOp.getAddress());
+  } else {
+    return OldValueOp;
+  }
+}
+
 static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
                                LValue &Stride) {
   // Figure out the stride of the buffer elements from the handle type.
@@ -1181,6 +1297,18 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
     llvm::Value *Args[] = {SpecId, DefaultVal};
     return Builder.CreateCall(SpecConstantFn, Args);
   }
+  case Builtin::BI__builtin_hlsl_interlocked_or: {
+    return handleAtomicBinOp(*this, E, AtomicOperationCode::Or, false, true);
+  }
+  case Builtin::BI__builtin_hlsl_interlocked_or64: {
+    return handleAtomicBinOp(*this, E, AtomicOperationCode::Or, false, false);
+  }
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret: {
+    return handleAtomicBinOp(*this, E, AtomicOperationCode::Or, true, true);
+  }
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret64: {
+    return handleAtomicBinOp(*this, E, AtomicOperationCode::Or, true, false);
+  }
   }
   return nullptr;
 }
diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
index 1dd7fd6fac455..25c913625b951 100644
--- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
+++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
@@ -22,6 +22,7 @@
 #include "clang/AST/Type.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/Sema/Lookup.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaHLSL.h"
@@ -1605,5 +1606,55 @@ BuiltinTypeDeclBuilder::addGetDimensionsMethodForBuffer() {
       .finalize();
 }
 
+BuiltinTypeDeclBuilder &
+BuiltinTypeDeclBuilder::addInterlockedMethodsForBuffer() {
+  using PH = BuiltinTypeMethodBuilder::PlaceHolder;
+  ASTContext &AST = SemaRef.getASTContext();
+  QualType UIntTy = AST.UnsignedIntTy;
+
+  BuiltinTypeMethodBuilder(*this, "InterlockedOr", AST.VoidTy)
+      .addParam("dest", UIntTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("value", UIntTy, HLSLParamModifierAttr::Keyword_in)
+      .callBuiltin("__builtin_hlsl_interlocked_or", QualType(), PH::Handle,
+                   PH::_0, PH::_1)
+      .finalize();
+
+  return BuiltinTypeMethodBuilder(*this, "InterlockedOr", AST.VoidTy)
+      .addParam("dest", UIntTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("value", UIntTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("original_value", UIntTy, HLSLParamModifierAttr::Keyword_out)
+      .callBuiltin("__builtin_hlsl_interlocked_or_ret", UIntTy, PH::Handle,
+                   PH::_0, PH::_1, PH::_2)
+      .finalize();
+}
+
+BuiltinTypeDeclBuilder &
+BuiltinTypeDeclBuilder::addInterlocked64MethodsForBuffer() {
+  ASTContext &AST = SemaRef.getASTContext();
+  VersionTuple TargetVersion = AST.getTargetInfo().getTriple().getOSVersion();
+  bool IsDXIL = AST.getTargetInfo().getTriple().getArch() == llvm::Triple::dxil;
+  if (TargetVersion < VersionTuple(6, 6) && IsDXIL)
+    return *this;
+
+  using PH = BuiltinTypeMethodBuilder::PlaceHolder;
+  QualType UIntTy = AST.UnsignedIntTy;
+  QualType ULongTy = AST.UnsignedLongTy;
+
+  BuiltinTypeMethodBuilder(*this, "InterlockedOr64", AST.VoidTy)
+      .addParam("dest", UIntTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("value", ULongTy, HLSLParamModifierAttr::Keyword_in)
+      .callBuiltin("__builtin_hlsl_interlocked_or64", QualType(), PH::Handle,
+                   PH::_0, PH::_1)
+      .finalize();
+
+  return BuiltinTypeMethodBuilder(*this, "InterlockedOr64", AST.VoidTy)
+      .addParam("dest", UIntTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("value", ULongTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("original_value", ULongTy, HLSLParamModifierAttr::Keyword_out)
+      .callBuiltin("__builtin_hlsl_interlocked_or_ret64", ULongTy, PH::Handle,
+                   PH::_0, PH::_1, PH::_2)
+      .finalize();
+}
+
 } // namespace hlsl
 } // namespace clang
diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
index aa6967e1eb725..29d5eff91a0bc 100644
--- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
+++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h
@@ -109,6 +109,8 @@ class BuiltinTypeDeclBuilder {
   BuiltinTypeDeclBuilder &addConsumeMethod();
 
   BuiltinTypeDeclBuilder &addGetDimensionsMethodForBuffer();
+  BuiltinTypeDeclBuilder &addInterlockedMethodsForBuffer();
+  BuiltinTypeDeclBuilder &addInterlocked64MethodsForBuffer();
 
 private:
   BuiltinTypeDeclBuilder &addCreateFromBinding();
diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp
index f7862b3a3f594..8836e2f0ab2cb 100644
--- a/clang/lib/Sema/HLSLExternalSemaSource.cpp
+++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp
@@ -515,6 +515,8 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
         .addByteAddressBufferLoadMethods()
         .addByteAddressBufferStoreMethods()
         .addGetDimensionsMethodForBuffer()
+        .addInterlockedMethodsForBuffer()
+        .addInterlocked64MethodsForBuffer()
         .completeDefinition();
   });
   Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace,
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 3e99a8f7d89d1..c3ed6a59e80c0 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3325,6 +3325,21 @@ static bool CheckVectorElementCount(Sema *S, QualType PassedType,
   return false;
 }
 
+static bool CheckShaderModelVersion(Sema *S, CallExpr *TheCall,
+                                    VersionTuple MinimumSMVersion) {
+  bool IsDXIL = S->getASTContext().getTargetInfo().getTriple().getArch() ==
+                llvm::Triple::dxil;
+  llvm::VersionTuple SMVersion =
+      S->getASTContext().getTargetInfo().getTriple().getOSVersion();
+  if (SMVersion < MinimumSMVersion && IsDXIL) {
+    S->Diag(TheCall->getBeginLoc(),
+            diag::err_hlsl_intrinsic_in_wrong_shader_model)
+        << TheCall << MinimumSMVersion.getAsString();
+    return true;
+  }
+  return false;
+}
+
 // Note: returning true in this case results in CheckBuiltinFunctionCall
 // returning an ExprError
 bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
@@ -3924,6 +3939,170 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
                                getASTContext().UnsignedIntTy);
     break;
   }
+  case Builtin::BI__builtin_hlsl_interlocked_or: {
+    if (SemaRef.checkArgCountRange(TheCall, 3, 4))
+      return true;
+    auto checkResTy = [this](const HLSLAttributedResourceType *ResTy) -> bool {
+      bool IsValid = false;
+      const ASTContext &AST = SemaRef.getASTContext();
+      // The resource handle must be either
+      // RWByteAddressBuffer or RWStructuredBuffer
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 ResTy->isRaw() && ResTy->hasContainedType();
+      // RWBuffer<int> or RWBuffer<uint>
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 !ResTy->isRaw() && ResTy->hasContainedType() &&
+                 (ResTy->getContainedType() == AST.IntTy ||
+                  ResTy->getContainedType() == AST.UnsignedIntTy);
+      // RWTexture<int> or RWTexture<uint> (any dimension)
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 !ResTy->isRaw() &&
+                 ResTy->getAttrs().ResourceDimension !=
+                     llvm::dxil::ResourceDimension::Unknown &&
+                 (ResTy->getContainedType() == AST.IntTy ||
+                  ResTy->getContainedType() == AST.UnsignedIntTy);
+      return !IsValid;
+    };
+    if (CheckResourceHandle(&SemaRef, TheCall, 0, checkResTy))
+      return true;
+
+    if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(1),
+                            SemaRef.getASTContext().UnsignedIntTy) ||
+        CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
+                            SemaRef.getASTContext().UnsignedIntTy))
+      return true;
+    // We will have a second index if handling a RWStructuredBuffer
+    if (TheCall->getNumArgs() == 4)
+      if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(3),
+                              SemaRef.getASTContext().UnsignedIntTy))
+        return true;
+
+    TheCall->setType(SemaRef.getASTContext().VoidTy);
+    break;
+  }
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret: {
+    if (SemaRef.checkArgCountRange(TheCall, 4, 5))
+      return true;
+    auto checkResTy = [this](const HLSLAttributedResourceType *ResTy) -> bool {
+      bool IsValid = false;
+      const ASTContext &AST = SemaRef.getASTContext();
+      // The resource handle must be either
+      // RWByteAddressBuffer or RWStructuredBuffer
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 ResTy->getAttrs().RawBuffer && ResTy->hasContainedType();
+      // RWBuffer<int> or RWBuffer<uint>
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 !ResTy->getAttrs().RawBuffer && ResTy->hasContainedType() &&
+                 (ResTy->getContainedType() == AST.IntTy ||
+                  ResTy->getContainedType() == AST.UnsignedIntTy);
+      // TODO: Handle Texture types when implemented
+      return !IsValid;
+    };
+    if (CheckResourceHandle(&SemaRef, TheCall, 0, checkResTy))
+      return true;
+
+    if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(1),
+                            SemaRef.getASTContext().UnsignedIntTy) ||
+        CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
+                            SemaRef.getASTContext().UnsignedIntTy) ||
+        CheckArgTypeMatches(&SemaRef, TheCall->getArg(3),
+                            SemaRef.getASTContext().UnsignedIntTy))
+      return true;
+    // We will have a second index if handling a RWStructuredBuffer
+    if (TheCall->getNumArgs() == 5)
+      if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(4),
+                              SemaRef.getASTContext().UnsignedIntTy))
+        return true;
+
+    TheCall->setType(SemaRef.getASTContext().UnsignedIntTy);
+    break;
+  }
+  case Builtin::BI__builtin_hlsl_interlocked_or64: {
+    if (SemaRef.checkArgCountRange(TheCall, 3, 4))
+      return true;
+    if (CheckShaderModelVersion(&SemaRef, TheCall, VersionTuple(6, 6)))
+      return true;
+    auto checkResTy = [this](const HLSLAttributedResourceType *ResTy) -> bool {
+      bool IsValid = false;
+      const ASTContext &AST = SemaRef.getASTContext();
+      // The resource handle must be either
+      // RWByteAddressBuffer or RWStructuredBuffer
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 ResTy->getAttrs().RawBuffer && ResTy->hasContainedType();
+      // RWBuffer<int> or RWBuffer<uint>
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 !ResTy->getAttrs().RawBuffer && ResTy->hasContainedType() &&
+                 (ResTy->getContainedType() == AST.LongTy ||
+                  ResTy->getContainedType() == AST.UnsignedLongTy);
+      // TODO: Handle Texture types when implemented
+      return !IsValid;
+    };
+    if (CheckResourceHandle(&SemaRef, TheCall, 0, checkResTy))
+      return true;
+
+    if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(1),
+                            SemaRef.getASTContext().UnsignedIntTy))
+      return true;
+    // We will have a second index if handling a RWStructuredBuffer
+    if (TheCall->getNumArgs() == 4) {
+      if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
+                              SemaRef.getASTContext().UnsignedIntTy) ||
+          CheckArgTypeMatches(&SemaRef, TheCall->getArg(3),
+                              SemaRef.getASTContext().UnsignedLongTy))
+        return true;
+    } else {
+      if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
+                              SemaRef.getASTContext().UnsignedLongTy))
+        return true;
+    }
+
+    TheCall->setType(SemaRef.getASTContext().VoidTy);
+    break;
+  }
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret64: {
+    if (SemaRef.checkArgCountRange(TheCall, 4, 5))
+      return true;
+    if (CheckShaderModelVersion(&SemaRef, TheCall, VersionTuple(6, 6)))
+      return true;
+    auto checkResTy = [this](const HLSLAttributedResourceType *ResTy) -> bool {
+      bool IsValid = false;
+      const ASTContext &AST = SemaRef.getASTContext();
+      // The resource handle must be either
+      // RWByteAddressBuffer or RWStructuredBuffer
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 ResTy->getAttrs().RawBuffer && ResTy->hasContainedType();
+      // RWBuffer<int> or RWBuffer<uint>
+      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+                 !ResTy->getAttrs().RawBuffer && ResTy->hasContainedType() &&
+                 (ResTy->getContainedType() == AST.LongTy ||
+                  ResTy->getContainedType() == AST.UnsignedLongTy);
+      // TODO: Handle Texture types when implemented
+      return !IsValid;
+    };
+    if (CheckResourceHandle(&SemaRef, TheCall, 0, checkResTy))
+      return true;
+
+    if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(1),
+                            SemaRef.getASTContext().UnsignedIntTy) ||
+        CheckArgTypeMatches(&SemaRef, TheCall->getArg(3),
+                            SemaRef.getASTContext().UnsignedLongTy))
+      return true;
+    // We will have a second index if handling a RWStructuredBuffer
+    if (TheCall->getNumArgs() == 5) {
+      if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
+                              SemaRef.getASTContext().UnsignedIntTy) ||
+          CheckArgTypeMatches(&SemaRef, TheCall->getArg(4),
+                              SemaRef.getASTContext().UnsignedLongTy))
+        return true;
+    } else {
+      if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
+                              SemaRef.getASTContext().UnsignedLongTy))
+        return true;
+    }
+
+    TheCall->setType(SemaRef.getASTContext().UnsignedLongTy);
+    break;
+  }
   }
   return false;
 }
diff --git a/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl
new file mode 100644
index 0000000000000..fa2a75f2309e9
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.6-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -o - -DBYTEADDRESS | FileCheck %s --check-prefixes=CHECK-BYTEADDRESS
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.6-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -o - -DINTBUF | FileCheck %s --check-prefixes=CHECK-INTBUF
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.6-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -o - -DUINTBUF | FileCheck %s --check-prefixes=CHECK-UINTBUF
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.6-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -o - -DSTRUCTURED | FileCheck %s --check-prefixes=CHECK-STRUCTURED
+
+#ifdef BYTEADDRESS
+using handle_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer]] [[hlsl::contained_type(char)]];
+#endif
+#ifdef INTBUF
+using handle_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(int)]];
+#endif
+#ifdef UINTBUF
+using handle_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(unsigned int)]];
+#endif
+#ifdef STRUCTURED
+struct TestStruct {
+  int a;
+  unsigned int b;
+};
+
+using handle_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer]] [[hlsl::contained_type(TestStruct)]];
+#endif
+
+struct CustomResource {
+  handle_t h;
+};
+
+#ifndef STRUCTURED
+
+// CHECK-LABEL: define {{.*}} i32 @_Z11test_return14CustomResource(
+// CHECK-BYTEADDRESS: %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+// CHECK-INTBUF: %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+// CHECK-UINTBUF: %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_0t(target("dx.TypedBuffer", i32, 1, 0, 0) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+// CHECK-NEXT: store i32 %hlsl.interlocked.or, ptr [[returnVal:%.*]], align 4
+// CHECK-NEXT: [[loadedReturnVal:%.*]] = load i32, ptr [[returnVal]], align 4
+// CHECK-NEXT: ret i32 [[loadedReturnVal]]
+unsigned int test_return(CustomResource cr) {
+  unsigned int returnVal = 0u;
+  __builtin_hlsl_interlocked_or_ret(cr.h, 1u, 0u, returnVal);
+  return returnVal;
+}
+
+// CHECK-LABEL: define {{.*}} void @_Z14test_no_return14CustomResource(
+// CHECK-BYTEADDRESS: %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+// CHECK-INTBUF: %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+// CHECK-UINTBUF: %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_0t(target("dx.TypedBuffer", i32, 1, 0, 0) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+// CHECK-NEXT: ret void
+void test_no_return(CustomResource h) {
+  __builtin_hlsl_interlocked_or(h.h, 1u, 0u);
+}
+
+#else
+
+// CHECK-STRUCTURED-LABEL: define {{.*}} i32 @_Z11test_return14CustomResource(
+// CHECK-STRUCTURED: %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %0, i32 2, i32 1, i32 4, i32 undef, i32 0)
+// CHECK-STRUCTURED-NEXT: store i32 %hlsl.interlocked.or, ptr [[returnVal:%.*]], align 4
+// CHECK-STRUCTURED-NEXT: [[loadedReturnVal:%.*]] = load i32, ptr [[returnVal]], align 4
+// CHECK-STRUCTURED-NEXT: ret i32 [[loadedReturnVal]]
+unsigned int test_return(CustomResource cr) {
+  unsigned int returnVal = 0u;
+  __builtin_hlsl_interlocked_or_ret(cr.h, 1u, 4u, 0u, returnVal);
+  return returnVal;
+}
+
+// CHECK-STRUCTURED-LABEL: define {{.*}} void @_Z14test_no_return14CustomResource(
+// CHECK-STRUCTURED: %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %0, i32 2, i32 1, i32 4, i32 undef, i32 0)
+// CHECK-STRUCTURED-NEXT: ret void
+void test_no_return(CustomResource h) {
+  __builtin_hlsl_interlocked_or(h.h, 1u, 4u, 0u);
+}
+
+#endif
diff --git a/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl b/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl
new file mode 100644
index 0000000000000..85caf9aeb4120
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl
@@ -0,0 +1,98 @@
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.0-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -o - -DINTERLOCKED32 | \
+// RUN:  FileCheck %s --check-prefixes=CHECK-32
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.6-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -o - -DINTERLOCKED64 | \
+// RUN:  FileCheck %s --check-prefixes=CHECK-64
+
+RWByteAddressBuffer buf: register(u0);
+
+// CHECK: %"class.hlsl::RWByteAddressBuffer" = type { target("dx.RawBuffer", i8, 1, 0) }
+
+#ifdef INTERLOCKED32
+
+// CHECK-32-LABEL: define {{.*}} @_Z11test_return
+// CHECK-32: call void @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjjRj
+// CHECK-32: ret i32 {{%.*}}
+uint test_return() {
+  uint returnVal;
+  buf.InterlockedOr(0, 0, returnVal);
+  return returnVal;
+}
+
+// CHECK-32-LABEL: define {{.*}} @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjjRj(
+// CHECK-32: [[this_addr:%.*]] = alloca ptr
+// CHECK-32: [[original_val:%.*]] = alloca ptr
+// CHECK-32: [[this:%.*]] = load ptr, ptr [[this_addr]]
+// CHECK-32: [[handle:%.*]] = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
+// CHECK-32: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
+// CHECK-32: [[dest:%.*]] = load i32, ptr %dest.addr
+// CHECK-32: [[newval:%.*]] = load i32, ptr %value.addr
+// CHECK-32: [[result:%.*]] = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 2, i32 [[dest]], i32 undef, i32 undef, i32 [[newval]])
+// CHECK-32: [[loaded_orig_val_ptr:%.*]] = load ptr, ptr [[original_val]]
+// CHECK-32: store i32 [[result]], ptr [[loaded_orig_val_ptr]]
+
+// CHECK-32-LABEL: define {{.*}} @_Z14test_no_return
+// CHECK-32: call void @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjj
+// CHECK-32: ret void
+void test_no_return() {
+  buf.InterlockedOr(0, 0);
+}
+
+// CHECK-32-LABEL: define {{.*}} void @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjj(
+// CHECK-32: [[this_addr:%.*]] = alloca ptr
+// CHECK-32: [[this:%.*]] = load ptr, ptr [[this_addr]]
+// CHECK-32: [[handle:%.*]] = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
+// CHECK-32: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
+// CHECK-32: [[dest:%.*]] = load i32, ptr %dest.addr
+// CHECK-32: [[newval:%.*]] = load i32, ptr %value.addr
+// CHECK-32: {{%.*}} = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 2, i32 [[dest]], i32 undef, i32 undef, i32 [[newval]])
+// CHECK-32: ret void
+
+// CHECK-32: declare i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0), i32, i32, i32, i32, i32)
+
+#endif
+
+#ifdef INTERLOCKED64
+
+// CHECK-LABEL: define {{.*}} @_Z13test_return64
+// CHECK: call void @_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64EjmRm
+// CHECK: ret i64 {{%.*}}
+uint64_t test_return64() {
+  uint64_t returnVal;
+  buf.InterlockedOr64(0, 0, returnVal);
+  return returnVal;
+}
+
+// CHECK-64-LABEL: define {{.*}} void @_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64EjmRm(
+// CHECK-64: [[this_addr:%.*]] = alloca ptr
+// CHECK-64: [[original_val:%.*]] = alloca ptr
+// CHECK-64: [[this:%.*]] = load ptr, ptr [[this_addr]]
+// CHECK-64: [[handle:%.*]] = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
+// CHECK-64: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
+// CHECK-64: [[dest:%.*]] = load i32, ptr %dest.addr
+// CHECK-64: [[newval:%.*]] = load i64, ptr %value.addr
+// CHECK-64: [[result:%.*]] = call i64 @llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 2, i32 [[dest]], i32 undef, i32 undef, i64 [[newval]])
+// CHECK-64: [[loaded_orig_val_ptr:%.*]] = load ptr, ptr [[original_val]]
+// CHECK-64: store i64 [[result]], ptr [[loaded_orig_val_ptr]]
+
+// CHECK-LABEL: define {{.*}} @_Z16test_no_return64
+// CHECK: call void @_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64Ejm
+// CHECK: ret void
+void test_no_return64() {
+  buf.InterlockedOr64(0, 0);
+}
+
+// CHECK-64-LABEL: define {{.*}} void @_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64Ejm(
+// CHECK-64: [[this_addr:%.*]] = alloca ptr
+// CHECK-64: [[this:%.*]] = load ptr, ptr [[this_addr]]
+// CHECK-64: [[handle:%.*]] = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
+// CHECK-64: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
+// CHECK-64: [[dest:%.*]] = load i32, ptr %dest.addr
+// CHECK-64: [[newval:%.*]] = load i64, ptr %value.addr
+// CHECK-64: {{.*}} = call i64 @llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 2, i32 [[dest]], i32 undef, i32 undef, i64 [[newval]])
+// CHECK-64: ret void
+
+// CHECK-64: declare i64 @llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0), i32, i32, i32, i32, i64)
+
+#endif
diff --git a/clang/test/SemaHLSL/BuiltIns/interlocked-or-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/interlocked-or-errors.hlsl
new file mode 100644
index 0000000000000..2c9da9b840297
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/interlocked-or-errors.hlsl
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library %s -verify
+
+void no_arg() {
+  __builtin_hlsl_interlocked_or();
+  // expected-error at -1 {{too few arguments to function call, expected 3, have 0}}
+}
+
+void too_many_args() {
+  __builtin_hlsl_interlocked_or(0, 0, 0, 0, 0);
+  // expected-error at -1 {{too many arguments to function call, expected at most 4, have 5}}
+}
+
+void non_resource_arg() {
+  __builtin_hlsl_interlocked_or(0, 0, 0);
+  // expected-error at -1 {{used type 'int' where __hlsl_resource_t is required}}
+}
+
+void ret_no_arg() {
+  __builtin_hlsl_interlocked_or_ret();
+  // expected-error at -1 {{too few arguments to function call, expected 4, have 0}}
+}
+
+void ret_too_many_args() {
+  __builtin_hlsl_interlocked_or_ret(0, 0, 0, 0, 0, 0);
+  // expected-error at -1 {{too many arguments to function call, expected at most 5, have 6}}
+}
+
+void ret_non_resource_arg() {
+  __builtin_hlsl_interlocked_or_ret(0, 0, 0, 0);
+  // expected-error at -1 {{used type 'int' where __hlsl_resource_t is required}}
+}
+
+// ByteAddressBuffer
+using handle_char_t = __hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::raw_buffer]] [[hlsl::contained_type(char)]];
+// Buffer<int>
+using handle_int_t = __hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::contained_type(int)]];
+// RWBuffer<float>
+using handle_float_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(float)]];
+
+struct CustomResource {
+  handle_char_t ByteAddressBufferChar;
+  handle_int_t BufferInt;
+  handle_float_t RWBufferFloat;
+};
+
+void invalid_byte_address_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or(CR.ByteAddressBufferChar, 0, 0);
+  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void invalid_typed_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or(CR.BufferInt, 0, 0);
+  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void invalid_rw_typed_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or(CR.RWBufferFloat, 0, 0);
+  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void ret_invalid_byte_address_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or_ret(CR.ByteAddressBufferChar, 0, 0, 0);
+  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void ret_invalid_typed_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or_ret(CR.BufferInt, 0, 0, 0);
+  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void ret_invalid_rw_typed_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or_ret(CR.RWBufferFloat, 0, 0, 0);
+  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void wrong_shader_model() {
+  __builtin_hlsl_interlocked_or64(0, 0, 0, 0);
+  // expected-error at -1 {{intrinsic '__builtin_hlsl_interlocked_or64(0, 0, 0, 0)' requires shader model 6.6 or greater}}
+}
+
+void ret_wrong_shader_model() {
+  __builtin_hlsl_interlocked_or_ret64(0, 0, 0, 0);
+  // expected-error at -1 {{intrinsic '__builtin_hlsl_interlocked_or_ret64(0, 0, 0, 0)' requires shader model 6.6 or greater}}
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/interlocked-or64-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/interlocked-or64-errors.hlsl
new file mode 100644
index 0000000000000..5a2a0ea2b23af
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/interlocked-or64-errors.hlsl
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library %s -verify
+
+void no_arg() {
+  __builtin_hlsl_interlocked_or64();
+  // expected-error at -1 {{too few arguments to function call, expected 3, have 0}}
+}
+
+void too_many_args() {
+  __builtin_hlsl_interlocked_or64(0, 0, 0, 0, 0);
+  // expected-error at -1 {{too many arguments to function call, expected at most 4, have 5}}
+}
+
+void non_resource_arg() {
+  __builtin_hlsl_interlocked_or64(0, 0, 0);
+  // expected-error at -1 {{used type 'int' where __hlsl_resource_t is required}}
+}
+
+void ret_no_arg() {
+  __builtin_hlsl_interlocked_or_ret64();
+  // expected-error at -1 {{too few arguments to function call, expected 4, have 0}}
+}
+
+void ret_too_many_args() {
+  __builtin_hlsl_interlocked_or_ret64(0, 0, 0, 0, 0, 0);
+  // expected-error at -1 {{too many arguments to function call, expected at most 5, have 6}}
+}
+
+void ret_non_resource_arg() {
+  __builtin_hlsl_interlocked_or_ret64(0, 0, 0, 0);
+  // expected-error at -1 {{used type 'int' where __hlsl_resource_t is required}}
+}
+
+// ByteAddressBuffer
+using handle_char_t = __hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::raw_buffer]] [[hlsl::contained_type(char)]];
+// Buffer<int>
+using handle_int_t = __hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::contained_type(int)]];
+// RWBuffer<float>
+using handle_float_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(float)]];
+
+struct CustomResource {
+  handle_char_t ByteAddressBufferChar;
+  handle_int_t BufferInt;
+  handle_float_t RWBufferFloat;
+};
+
+void invalid_byte_address_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or64(CR.ByteAddressBufferChar, 0, 0);
+  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void invalid_typed_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or64(CR.BufferInt, 0, 0);
+  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void invalid_rw_typed_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or64(CR.RWBufferFloat, 0, 0);
+  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void ret_invalid_byte_address_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or_ret64(CR.ByteAddressBufferChar, 0, 0, 0);
+  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void ret_invalid_typed_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or_ret64(CR.BufferInt, 0, 0, 0);
+  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
+}
+
+void ret_invalid_rw_typed_buffer(CustomResource CR) {
+  __builtin_hlsl_interlocked_or_ret64(CR.RWBufferFloat, 0, 0, 0);
+  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 88732bfa5a892..27b9aafed5218 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -94,6 +94,9 @@ def int_dx_resource_sample_clamp
 // Cast between target extension handle types and dxil-style opaque handles
 def int_dx_resource_casthandle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
 
+def int_dx_resource_atomicbinop: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrWriteMem]>;
+def int_dx_resource_atomicbinop64: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_any_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [IntrWriteMem]>;
+
 def int_dx_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>;
 def int_dx_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>;
 def int_dx_asdouble : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [llvm_anyint_ty, LLVMMatchType<0>], [IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 8751484496395..a88bade555517 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -910,6 +910,14 @@ def GetDimensions : DXILOp<72, getDimensions> {
   let stages = [Stages<DXIL1_0, [all_stages]>];
 }
 
+def AtomicBinOp : DXILOp<78, atomicBinOp> {
+  let Doc = "performs an atomic operation on a value in memory";
+  let arguments = [HandleTy, Int32Ty, Int32Ty, Int32Ty, Int32Ty, OverloadTy];
+  let result = OverloadTy;
+  let overloads = [Overloads<DXIL1_0, [Int32Ty, Int64Ty]>];
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
 def Barrier : DXILOp<80, barrier> {
   let Doc = "inserts a memory barrier in the shader";
   let intrinsics = [
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 0c0830cc92aa7..55117340d75da 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -920,6 +920,60 @@ class OpLowerer {
     });
   }
 
+  [[nodiscard]] bool lowerAtomicBinOp(Function &F) {
+    IRBuilder<> &IRB = OpBuilder.getIRB();
+    Type *RetTy = IRB.getInt32Ty();
+
+    return replaceFunction(F, [&](CallInst *CI) -> Error {
+      IRB.SetInsertPoint(CI);
+      Value *Handle =
+          createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
+      Value *OperationMode = CI->getArgOperand(1);
+      Value *Index1 = CI->getArgOperand(2);
+      Value *Index2 = CI->getArgOperand(3);
+      Value *Index3 = CI->getArgOperand(4);
+      Value *NewVal = CI->getArgOperand(5);
+      SmallVector<Value *> Args{Handle, OperationMode, Index1,
+                                Index2, Index3,        NewVal};
+
+      Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
+          dxil::OpCode::AtomicBinOp, Args, CI->getName(), RetTy);
+      if (Error E = OpCall.takeError())
+        return E;
+
+      CI->replaceAllUsesWith(*OpCall);
+      CI->eraseFromParent();
+      return Error::success();
+    });
+  }
+
+  [[nodiscard]] bool lowerAtomicBinOp64(Function &F) {
+    IRBuilder<> &IRB = OpBuilder.getIRB();
+    Type *RetTy = IRB.getInt64Ty();
+
+    return replaceFunction(F, [&](CallInst *CI) -> Error {
+      IRB.SetInsertPoint(CI);
+      Value *Handle =
+          createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
+      Value *OperationMode = CI->getArgOperand(1);
+      Value *Index1 = CI->getArgOperand(2);
+      Value *Index2 = CI->getArgOperand(3);
+      Value *Index3 = CI->getArgOperand(4);
+      Value *NewVal = CI->getArgOperand(5);
+      SmallVector<Value *> Args{Handle, OperationMode, Index1,
+                                Index2, Index3,        NewVal};
+
+      Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
+          dxil::OpCode::AtomicBinOp, Args, CI->getName(), RetTy);
+      if (Error E = OpCall.takeError())
+        return E;
+
+      CI->replaceAllUsesWith(*OpCall);
+      CI->eraseFromParent();
+      return Error::success();
+    });
+  }
+
   bool lowerIntrinsics() {
     bool Updated = false;
     bool HasErrors = false;
@@ -1006,6 +1060,12 @@ class OpLowerer {
       case Intrinsic::is_fpclass:
         HasErrors |= lowerIsFPClass(F);
         break;
+      case Intrinsic::dx_resource_atomicbinop:
+        HasErrors |= lowerAtomicBinOp(F);
+        break;
+      case Intrinsic::dx_resource_atomicbinop64:
+        HasErrors |= lowerAtomicBinOp64(F);
+        break;
       }
       Updated = true;
     }
diff --git a/llvm/test/CodeGen/DirectX/interlocked-or.ll b/llvm/test/CodeGen/DirectX/interlocked-or.ll
new file mode 100644
index 0000000000000..f53a0a296ebda
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/interlocked-or.ll
@@ -0,0 +1,117 @@
+; RUN: opt -S -dxil-op-lower %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define i32 @_Z20byteaddr_test_return() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i32, align 4
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", i8, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
+  store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
+  ; CHECK: [[RETLOAD:]] = load i32, ptr [[RETURN]]
+  %0 = load i32, ptr %returnVal, align 4
+  ; CHECK; ret i32 [[RETLOAD]]
+  ret i32 %0
+}
+
+define void @_Z23byteaddr_test_no_return() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", i8, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: ret void
+  ret void
+}
+
+%struct.TestStruct = type { i32, i32 }
+
+define i32 @_Z18struct_test_return() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i32, align 4
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 4, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %buffer, i32 2, i32 1, i32 4, i32 undef, i32 0)
+  ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
+  store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
+  ; CHECK: [[RETLOAD:]] = load i32, ptr [[RETURN]]
+  %0 = load i32, ptr %returnVal, align 4
+  ; CHECK; ret i32 [[RETLOAD]]
+  ret i32 %0
+}
+
+define void @_Z21struct_test_no_return() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 4, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %buffer, i32 2, i32 1, i32 4, i32 undef, i32 0)
+  ; CHECK: ret void
+  ret void
+}
+
+define i32 @_Z21typed_int_test_return() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i32, align 4
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
+  store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
+  ; CHECK: [[RETLOAD:]] = load i32, ptr [[RETURN]]
+  %0 = load i32, ptr %returnVal, align 4
+  ; CHECK; ret i32 [[RETLOAD]]
+  ret i32 %0
+}
+
+define void @_Z24typed_int_test_no_return() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: ret void
+  ret void
+}
+
+define i32 @_Z22typed_uint_test_return() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i32, align 4
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i32, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_0t(target("dx.TypedBuffer", i32, 1, 0, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
+  store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
+  ; CHECK: [[RETLOAD:]] = load i32, ptr [[RETURN]]
+  %0 = load i32, ptr %returnVal, align 4
+  ; CHECK; ret i32 [[RETLOAD]]
+  ret i32 %0
+}
+
+define void @_Z25typed_uint_test_no_return() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i32, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_0t(target("dx.TypedBuffer", i32, 1, 0, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: ret void
+  ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/interlocked-or64.ll b/llvm/test/CodeGen/DirectX/interlocked-or64.ll
new file mode 100644
index 0000000000000..0aaf170c8eea1
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/interlocked-or64.ll
@@ -0,0 +1,117 @@
+; RUN: opt -S -dxil-op-lower %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define i64 @_Z20byteaddr_test_return() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i64, align 8
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", i8, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 @llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
+  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
+  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
+  %0 = load i64, ptr %returnVal, align 8
+  ; CHECK; ret i64 [[RETLOAD]]
+  ret i64 %0
+}
+
+define void @_Z23byteaddr_test_no_return() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", i8, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 @llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: ret void
+  ret void
+}
+
+%struct.TestStruct = type { i64, i64 }
+
+define i64 @_Z18struct_test_return() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i64, align 8
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 8, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 @llvm.dx.resource.atomicbinop64.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %buffer, i32 2, i32 1, i32 8, i32 undef, i64 0)
+  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
+  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
+  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
+  %0 = load i64, ptr %returnVal, align 8
+  ; CHECK; ret i64 [[RETLOAD]]
+  ret i64 %0
+}
+
+define void @_Z21struct_test_no_return() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 8, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 @llvm.dx.resource.atomicbinop64.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %buffer, i32 2, i32 1, i32 8, i32 undef, i64 0)
+  ; CHECK: ret void
+  ret void
+}
+
+define i64 @_Z21typed_int_test_return() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i64, align 8
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 @llvm.dx.resource.atomicbinop64.tdx.TypedBuffer_i64_1_0_1t(target("dx.TypedBuffer", i64, 1, 0, 1) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
+  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
+  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
+  %0 = load i64, ptr %returnVal, align 8
+  ; CHECK; ret i64 [[RETLOAD]]
+  ret i64 %0
+}
+
+define void @_Z24typed_int_test_no_return() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 @llvm.dx.resource.atomicbinop64.tdx.TypedBuffer_i64_1_0_1t(target("dx.TypedBuffer", i64, 1, 0, 1) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: ret void
+  ret void
+}
+
+define i64 @_Z22typed_uint_test_return() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i64, align 8
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 @llvm.dx.resource.atomicbinop64.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer", i64, 1, 0, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
+  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
+  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
+  %0 = load i64, ptr %returnVal, align 8
+  ; CHECK; ret i64 [[RETLOAD]]
+  ret i64 %0
+}
+
+define void @_Z25typed_uint_test_no_return() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 @llvm.dx.resource.atomicbinop64.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer", i64, 1, 0, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: ret void
+  ret void
+}

>From b13e91b3e6b71f64c5651a34397f762e7622d019 Mon Sep 17 00:00:00 2001
From: Alexander Johnston <alexander.johnston at amd.com>
Date: Wed, 25 Feb 2026 13:05:41 +0000
Subject: [PATCH 2/5] Update from review feedback

Moves some of the interlocked/atomicBinOp logic to the backend and into DXIL.td
With the removal of the 64bit variant of interlocked_or for lowering purposes the
backend test is condensed into one.
Replaces frontend Undef values with Poison, with the intention of being lowered
into Undef in the DXIL backend. The Poison -> Undef replacement is NOT done in
the backend yet.
---
 clang/lib/CodeGen/CGHLSLBuiltins.cpp          |  54 +++----
 .../builtins/Interlocked-or-builtin.hlsl      |  16 +-
 .../CodeGenHLSL/builtins/Interlocked-or.hlsl  |  12 +-
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |   3 +-
 llvm/lib/Target/DirectX/DXIL.td               |  25 ++++
 llvm/lib/Target/DirectX/DXILOpLowering.cpp    |  64 +-------
 llvm/test/CodeGen/DirectX/interlocked-or.ll   | 140 +++++++++++++++++-
 llvm/test/CodeGen/DirectX/interlocked-or64.ll | 117 ---------------
 8 files changed, 193 insertions(+), 238 deletions(-)
 delete mode 100644 llvm/test/CodeGen/DirectX/interlocked-or64.ll

diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 39d716bea91bf..ba96b087e59f6 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -300,23 +300,8 @@ static Value *handleElementwiseF32ToF16(CodeGenFunction &CGF,
   llvm_unreachable("Intrinsic F32ToF16 not supported by target architecture");
 }
 
-// Not sure where would be best for this to live
-// AtomicBinOp uses an i32 to determine the operation mode as follows
-enum AtomicOperationCode : uint {
-  Add = 0,
-  And = 1,
-  Or = 2,
-  Xor = 3,
-  IMin = 4,
-  IMax = 5,
-  UMin = 6,
-  UMax = 7,
-  Exchange = 8
-};
-
-static Value *handleAtomicBinOp(CodeGenFunction &CGF, const CallExpr *E,
-                                const AtomicOperationCode OpCode,
-                                const bool HasReturn, const bool Is32Bit) {
+static Value *handleInterlockedOr(CodeGenFunction &CGF, const CallExpr *E,
+                                  const bool HasReturn, const bool Is32Bit) {
   Value *HandleOp = CGF.EmitScalarExpr(E->getArg(0));
   Value *IndexOp = CGF.EmitScalarExpr(E->getArg(1));
   Value *StructuredBufIndexOp;
@@ -349,18 +334,13 @@ static Value *handleAtomicBinOp(CodeGenFunction &CGF, const CallExpr *E,
     const HLSLAttributedResourceType *ResourceTy =
         HandleTy->getAs<HLSLAttributedResourceType>();
 
-    // AtomicBinOp uses an i32 to determine the operation mode as follows
-    // Add: 0, And: 1, Or: 2, Xor: 3, IMin: 4, IMax: 5, UMin: 6, UMax: 7,
-    // Exchange: 8
-    Value *ModeConstant = ConstantInt::get(CGF.Int32Ty, OpCode);
-
     // AtomicBinOp has 3 coordinate params which must be handled differently
     // depending on the resource type being accessed.
     // Initially undef all the coordinates then fill as required
-    Value *Undef = UndefValue::get(CGF.Int32Ty);
-    Value *C0 = Undef;
-    Value *C1 = Undef;
-    Value *C2 = Undef;
+    Value *Poison = PoisonValue::get(CGF.Int32Ty);
+    Value *C0 = Poison;
+    Value *C1 = Poison;
+    Value *C2 = Poison;
     if (!ResourceTy->getAttrs().RawBuffer) {
       assert(
           (ResourceTy->getContainedType() == CGF.getContext().IntTy ||
@@ -381,23 +361,23 @@ static Value *handleAtomicBinOp(CodeGenFunction &CGF, const CallExpr *E,
       C0 = IndexOp;
       C1 = StructuredBufIndexOp;
     }
-    assert(C0 != Undef && "Failed to identify coordinates for Interlocked");
+    assert(C0 != Poison && "Failed to identify coordinates for Interlocked");
     // TODO: Add coordinate logic for texture and groupshared
 
     // atomicBinOp
     // opcode, handle, binary operation code, coordinates c0, c1, c2, new val
     if (Is32Bit) {
-      Intrinsic::ID ID = Intrinsic::dx_resource_atomicbinop;
+      Intrinsic::ID ID = Intrinsic::dx_interlocked_or;
       OldValueOp = CGF.Builder.CreateIntrinsic(
           /*ReturnType=*/CGF.Int32Ty, ID,
-          ArrayRef<Value *>{HandleOp, ModeConstant, C0, C1, C2, NewValueOp},
-          nullptr, "hlsl.interlocked.or");
+          ArrayRef<Value *>{HandleOp, C0, C1, C2, NewValueOp}, nullptr,
+          "hlsl.interlocked.or");
     } else {
-      Intrinsic::ID ID = Intrinsic::dx_resource_atomicbinop64;
+      Intrinsic::ID ID = Intrinsic::dx_interlocked_or;
       OldValueOp = CGF.Builder.CreateIntrinsic(
           /*ReturnType=*/CGF.Int64Ty, ID,
-          ArrayRef<Value *>{HandleOp, ModeConstant, C0, C1, C2, NewValueOp},
-          nullptr, "hlsl.interlocked.or");
+          ArrayRef<Value *>{HandleOp, C0, C1, C2, NewValueOp}, nullptr,
+          "hlsl.interlocked.or");
     }
     break;
   }
@@ -1298,16 +1278,16 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(SpecConstantFn, Args);
   }
   case Builtin::BI__builtin_hlsl_interlocked_or: {
-    return handleAtomicBinOp(*this, E, AtomicOperationCode::Or, false, true);
+    return handleInterlockedOr(*this, E, false, true);
   }
   case Builtin::BI__builtin_hlsl_interlocked_or64: {
-    return handleAtomicBinOp(*this, E, AtomicOperationCode::Or, false, false);
+    return handleInterlockedOr(*this, E, false, false);
   }
   case Builtin::BI__builtin_hlsl_interlocked_or_ret: {
-    return handleAtomicBinOp(*this, E, AtomicOperationCode::Or, true, true);
+    return handleInterlockedOr(*this, E, true, true);
   }
   case Builtin::BI__builtin_hlsl_interlocked_or_ret64: {
-    return handleAtomicBinOp(*this, E, AtomicOperationCode::Or, true, false);
+    return handleInterlockedOr(*this, E, true, false);
   }
   }
   return nullptr;
diff --git a/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl
index fa2a75f2309e9..b43666c89a45f 100644
--- a/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl
@@ -32,9 +32,9 @@ struct CustomResource {
 #ifndef STRUCTURED
 
 // CHECK-LABEL: define {{.*}} i32 @_Z11test_return14CustomResource(
-// CHECK-BYTEADDRESS: %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
-// CHECK-INTBUF: %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
-// CHECK-UINTBUF: %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_0t(target("dx.TypedBuffer", i32, 1, 0, 0) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+// CHECK-BYTEADDRESS: %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
+// CHECK-INTBUF: %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer", i32, 1, 0, 1) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
+// CHECK-UINTBUF: %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_0t.i32(target("dx.TypedBuffer", i32, 1, 0, 0) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
 // CHECK-NEXT: store i32 %hlsl.interlocked.or, ptr [[returnVal:%.*]], align 4
 // CHECK-NEXT: [[loadedReturnVal:%.*]] = load i32, ptr [[returnVal]], align 4
 // CHECK-NEXT: ret i32 [[loadedReturnVal]]
@@ -45,9 +45,9 @@ unsigned int test_return(CustomResource cr) {
 }
 
 // CHECK-LABEL: define {{.*}} void @_Z14test_no_return14CustomResource(
-// CHECK-BYTEADDRESS: %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
-// CHECK-INTBUF: %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
-// CHECK-UINTBUF: %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_0t(target("dx.TypedBuffer", i32, 1, 0, 0) {{%.*}}, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+// CHECK-BYTEADDRESS: %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
+// CHECK-INTBUF: %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer", i32, 1, 0, 1) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
+// CHECK-UINTBUF: %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_0t.i32(target("dx.TypedBuffer", i32, 1, 0, 0) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
 // CHECK-NEXT: ret void
 void test_no_return(CustomResource h) {
   __builtin_hlsl_interlocked_or(h.h, 1u, 0u);
@@ -56,7 +56,7 @@ void test_no_return(CustomResource h) {
 #else
 
 // CHECK-STRUCTURED-LABEL: define {{.*}} i32 @_Z11test_return14CustomResource(
-// CHECK-STRUCTURED: %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %0, i32 2, i32 1, i32 4, i32 undef, i32 0)
+// CHECK-STRUCTURED: %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_s_struct.TestStructs_1_0t.i32(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %0, i32 1, i32 4, i32 poison, i32 0)
 // CHECK-STRUCTURED-NEXT: store i32 %hlsl.interlocked.or, ptr [[returnVal:%.*]], align 4
 // CHECK-STRUCTURED-NEXT: [[loadedReturnVal:%.*]] = load i32, ptr [[returnVal]], align 4
 // CHECK-STRUCTURED-NEXT: ret i32 [[loadedReturnVal]]
@@ -67,7 +67,7 @@ unsigned int test_return(CustomResource cr) {
 }
 
 // CHECK-STRUCTURED-LABEL: define {{.*}} void @_Z14test_no_return14CustomResource(
-// CHECK-STRUCTURED: %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %0, i32 2, i32 1, i32 4, i32 undef, i32 0)
+// CHECK-STRUCTURED: %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_s_struct.TestStructs_1_0t.i32(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %0, i32 1, i32 4, i32 poison, i32 0)
 // CHECK-STRUCTURED-NEXT: ret void
 void test_no_return(CustomResource h) {
   __builtin_hlsl_interlocked_or(h.h, 1u, 4u, 0u);
diff --git a/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl b/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl
index 85caf9aeb4120..b6611ff0800ae 100644
--- a/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl
@@ -28,7 +28,7 @@ uint test_return() {
 // CHECK-32: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
 // CHECK-32: [[dest:%.*]] = load i32, ptr %dest.addr
 // CHECK-32: [[newval:%.*]] = load i32, ptr %value.addr
-// CHECK-32: [[result:%.*]] = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 2, i32 [[dest]], i32 undef, i32 undef, i32 [[newval]])
+// CHECK-32: [[result:%.*]] = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i32 [[newval]])
 // CHECK-32: [[loaded_orig_val_ptr:%.*]] = load ptr, ptr [[original_val]]
 // CHECK-32: store i32 [[result]], ptr [[loaded_orig_val_ptr]]
 
@@ -46,10 +46,10 @@ void test_no_return() {
 // CHECK-32: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
 // CHECK-32: [[dest:%.*]] = load i32, ptr %dest.addr
 // CHECK-32: [[newval:%.*]] = load i32, ptr %value.addr
-// CHECK-32: {{%.*}} = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 2, i32 [[dest]], i32 undef, i32 undef, i32 [[newval]])
+// CHECK-32: {{%.*}} = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i32 [[newval]])
 // CHECK-32: ret void
 
-// CHECK-32: declare i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0), i32, i32, i32, i32, i32)
+// CHECK-32: declare i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0), i32, i32, i32, i32)
 
 #endif
 
@@ -72,7 +72,7 @@ uint64_t test_return64() {
 // CHECK-64: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
 // CHECK-64: [[dest:%.*]] = load i32, ptr %dest.addr
 // CHECK-64: [[newval:%.*]] = load i64, ptr %value.addr
-// CHECK-64: [[result:%.*]] = call i64 @llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 2, i32 [[dest]], i32 undef, i32 undef, i64 [[newval]])
+// CHECK-64: [[result:%.*]] = call i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t.i64(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i64 [[newval]])
 // CHECK-64: [[loaded_orig_val_ptr:%.*]] = load ptr, ptr [[original_val]]
 // CHECK-64: store i64 [[result]], ptr [[loaded_orig_val_ptr]]
 
@@ -90,9 +90,9 @@ void test_no_return64() {
 // CHECK-64: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
 // CHECK-64: [[dest:%.*]] = load i32, ptr %dest.addr
 // CHECK-64: [[newval:%.*]] = load i64, ptr %value.addr
-// CHECK-64: {{.*}} = call i64 @llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 2, i32 [[dest]], i32 undef, i32 undef, i64 [[newval]])
+// CHECK-64: {{.*}} = call i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t.i64(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i64 [[newval]])
 // CHECK-64: ret void
 
-// CHECK-64: declare i64 @llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0), i32, i32, i32, i32, i64)
+// CHECK-64: declare i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t.i64(target("dx.RawBuffer", i8, 1, 0), i32, i32, i32, i64)
 
 #endif
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 27b9aafed5218..2e4dbf2886c2e 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -94,8 +94,7 @@ def int_dx_resource_sample_clamp
 // Cast between target extension handle types and dxil-style opaque handles
 def int_dx_resource_casthandle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
 
-def int_dx_resource_atomicbinop: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrWriteMem]>;
-def int_dx_resource_atomicbinop64: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_any_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [IntrWriteMem]>;
+def int_dx_interlocked_or: DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_any_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrWriteMem]>;
 
 def int_dx_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>;
 def int_dx_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index a88bade555517..e3d8037e33853 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -317,12 +317,23 @@ defvar WaveOpKind_Product = 1;
 defvar WaveOpKind_Min = 2;
 defvar WaveOpKind_Max = 3;
 
+defvar AtomicBinOp_Add = 0;
+defvar AtomicBinOp_And = 1;
+defvar AtomicBinOp_Or = 2;
+defvar AtomicBinOp_Xor = 3;
+defvar AtomicBinOp_IMin = 4;
+defvar AtomicBinOp_IMax = 5;
+defvar AtomicBinOp_UMin = 6;
+defvar AtomicBinOp_UMax = 7;
+defvar AtomicBinOp_Exchange = 8;
+
 defvar SignedOpKind_Signed = 0;
 defvar SignedOpKind_Unsigned = 1;
 
 // Intrinsic arg selection
 class IntrinArgSelectType;
 def IntrinArgSelect_Index : IntrinArgSelectType;
+def IntrinArgSelect_HandleIndex : IntrinArgSelectType;
 def IntrinArgSelect_I8 : IntrinArgSelectType;
 def IntrinArgSelect_I32 : IntrinArgSelectType;
 
@@ -332,6 +343,7 @@ class IntrinArgSelect<IntrinArgSelectType type_, int value_> {
 }
 
 class IntrinArgIndex<int index> : IntrinArgSelect<IntrinArgSelect_Index, index>;
+class IntrinArgHandleIndex<int index> : IntrinArgSelect<IntrinArgSelect_HandleIndex, index>;
 class IntrinArgI8<int value> : IntrinArgSelect<IntrinArgSelect_I8, value>;
 class IntrinArgI32<int value> : IntrinArgSelect<IntrinArgSelect_I32, value>;
 
@@ -351,6 +363,14 @@ class IntrinArgI32<int value> : IntrinArgSelect<IntrinArgSelect_I32, value>;
 //     >,
 //   ]
 //=========================================================================================
+// Using IntrinArgHandleIndex<>, handle arguments of the intrinsic can be copied to the
+// DXIL OP with required casting in specific order:
+//   let intrinsics = [
+//     IntrinSelect<int_dx_my_intrinsic,
+//       [IntrinArgHandleHandleIndex<2>, IntrinArgHandleIndex<1>, IntrinArgHandleIndex<0>> ]
+//     >,
+//   ]
+//=========================================================================================
 // Using IntrinArgI8<> and IntrinArgI32<>, integer constants can be added
 // directly to the dxil op. This can be used in conjunction with
 // IntrinArgIndex:
@@ -912,6 +932,11 @@ def GetDimensions : DXILOp<72, getDimensions> {
 
 def AtomicBinOp : DXILOp<78, atomicBinOp> {
   let Doc = "performs an atomic operation on a value in memory";
+  let intrinsics = [
+    IntrinSelect<int_dx_interlocked_or,
+    [ IntrinArgHandleIndex<0>, IntrinArgI32<AtomicBinOp_Or>, IntrinArgIndex<1>,
+      IntrinArgIndex<2>, IntrinArgIndex<3>, IntrinArgIndex<4> ]>
+  ];
   let arguments = [HandleTy, Int32Ty, Int32Ty, Int32Ty, Int32Ty, OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [Int32Ty, Int64Ty]>];
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 55117340d75da..d6f4fbebccd51 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -119,6 +119,10 @@ class OpLowerer {
           case IntrinArgSelect::Type::Index:
             Args.push_back(CI->getArgOperand(A.Value));
             break;
+          case IntrinArgSelect::Type::HandleIndex:
+            Args.push_back(createTmpHandleCast(CI->getArgOperand(A.Value),
+                                               OpBuilder.getHandleType()));
+            break;
           case IntrinArgSelect::Type::I8:
             Args.push_back(OpBuilder.getIRB().getInt8((uint8_t)A.Value));
             break;
@@ -920,60 +924,6 @@ class OpLowerer {
     });
   }
 
-  [[nodiscard]] bool lowerAtomicBinOp(Function &F) {
-    IRBuilder<> &IRB = OpBuilder.getIRB();
-    Type *RetTy = IRB.getInt32Ty();
-
-    return replaceFunction(F, [&](CallInst *CI) -> Error {
-      IRB.SetInsertPoint(CI);
-      Value *Handle =
-          createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
-      Value *OperationMode = CI->getArgOperand(1);
-      Value *Index1 = CI->getArgOperand(2);
-      Value *Index2 = CI->getArgOperand(3);
-      Value *Index3 = CI->getArgOperand(4);
-      Value *NewVal = CI->getArgOperand(5);
-      SmallVector<Value *> Args{Handle, OperationMode, Index1,
-                                Index2, Index3,        NewVal};
-
-      Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
-          dxil::OpCode::AtomicBinOp, Args, CI->getName(), RetTy);
-      if (Error E = OpCall.takeError())
-        return E;
-
-      CI->replaceAllUsesWith(*OpCall);
-      CI->eraseFromParent();
-      return Error::success();
-    });
-  }
-
-  [[nodiscard]] bool lowerAtomicBinOp64(Function &F) {
-    IRBuilder<> &IRB = OpBuilder.getIRB();
-    Type *RetTy = IRB.getInt64Ty();
-
-    return replaceFunction(F, [&](CallInst *CI) -> Error {
-      IRB.SetInsertPoint(CI);
-      Value *Handle =
-          createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
-      Value *OperationMode = CI->getArgOperand(1);
-      Value *Index1 = CI->getArgOperand(2);
-      Value *Index2 = CI->getArgOperand(3);
-      Value *Index3 = CI->getArgOperand(4);
-      Value *NewVal = CI->getArgOperand(5);
-      SmallVector<Value *> Args{Handle, OperationMode, Index1,
-                                Index2, Index3,        NewVal};
-
-      Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
-          dxil::OpCode::AtomicBinOp, Args, CI->getName(), RetTy);
-      if (Error E = OpCall.takeError())
-        return E;
-
-      CI->replaceAllUsesWith(*OpCall);
-      CI->eraseFromParent();
-      return Error::success();
-    });
-  }
-
   bool lowerIntrinsics() {
     bool Updated = false;
     bool HasErrors = false;
@@ -1060,12 +1010,6 @@ class OpLowerer {
       case Intrinsic::is_fpclass:
         HasErrors |= lowerIsFPClass(F);
         break;
-      case Intrinsic::dx_resource_atomicbinop:
-        HasErrors |= lowerAtomicBinOp(F);
-        break;
-      case Intrinsic::dx_resource_atomicbinop64:
-        HasErrors |= lowerAtomicBinOp64(F);
-        break;
       }
       Updated = true;
     }
diff --git a/llvm/test/CodeGen/DirectX/interlocked-or.ll b/llvm/test/CodeGen/DirectX/interlocked-or.ll
index f53a0a296ebda..ef03d7c67b758 100644
--- a/llvm/test/CodeGen/DirectX/interlocked-or.ll
+++ b/llvm/test/CodeGen/DirectX/interlocked-or.ll
@@ -10,7 +10,7 @@ entry:
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.RawBuffer", i8, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
   ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
-  %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = tail call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 0, i32 poison, i32 poison, i32 0)
   ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
   store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
   ; CHECK: [[RETLOAD:]] = load i32, ptr [[RETURN]]
@@ -25,7 +25,7 @@ entry:
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.RawBuffer", i8, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
   ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
-  %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 1, i32 poison, i32 poison, i32 0)
   ; CHECK: ret void
   ret void
 }
@@ -40,7 +40,7 @@ entry:
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
   ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 4, i32 undef, i32 0)
-  %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %buffer, i32 2, i32 1, i32 4, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_s_struct.TestStructs_1_0t.i32(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %buffer, i32 1, i32 4, i32 poison, i32 0)
   ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
   store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
   ; CHECK: [[RETLOAD:]] = load i32, ptr [[RETURN]]
@@ -55,7 +55,7 @@ entry:
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
   ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 4, i32 undef, i32 0)
-  %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %buffer, i32 2, i32 1, i32 4, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_s_struct.TestStructs_1_0t.i32(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %buffer,  i32 1, i32 4, i32 poison, i32 0)
   ; CHECK: ret void
   ret void
 }
@@ -68,7 +68,7 @@ entry:
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
   ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
-  %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer", i32, 1, 0, 1) %buffer,  i32 1, i32 poison, i32 poison, i32 0)
   ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
   store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
   ; CHECK: [[RETLOAD:]] = load i32, ptr [[RETURN]]
@@ -83,7 +83,7 @@ entry:
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
   ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
-  %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer", i32, 1, 0, 1) %buffer, i32 1, i32 poison, i32 poison, i32 0)
   ; CHECK: ret void
   ret void
 }
@@ -96,7 +96,7 @@ entry:
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.TypedBuffer", i32, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
   ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
-  %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_0t(target("dx.TypedBuffer", i32, 1, 0, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_0t.i32(target("dx.TypedBuffer", i32, 1, 0, 0) %buffer, i32 1, i32 poison, i32 poison, i32 0)
   ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
   store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
   ; CHECK: [[RETLOAD:]] = load i32, ptr [[RETURN]]
@@ -111,7 +111,131 @@ entry:
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.TypedBuffer", i32, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
   ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
-  %hlsl.interlocked.or = call i32 @llvm.dx.resource.atomicbinop.tdx.TypedBuffer_i32_1_0_0t(target("dx.TypedBuffer", i32, 1, 0, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_0t.i32(target("dx.TypedBuffer", i32, 1, 0, 0) %buffer, i32 1, i32 poison, i32 poison, i32 0)
   ; CHECK: ret void
   ret void
 }
+
+define i64 @_Z22byteaddr_test_return64() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i64, align 8
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", i8, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 1, i32 poison, i32 poison, i64 0)
+  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
+  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
+  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
+  %0 = load i64, ptr %returnVal, align 8
+  ; CHECK; ret i64 [[RETLOAD]]
+  ret i64 %0
+}
+
+define void @_Z25byteaddr_test_no_return64() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", i8, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 1, i32 poison, i32 poison, i64 0)
+  ; CHECK: ret void
+  ret void
+}
+
+%struct.TestStruct64 = type { i64, i64 }
+
+define i64 @_Z20struct_test_return64() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i64, align 8
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", %struct.TestStruct64, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStruct64s_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 8, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_s_struct.TestStruct64s_1_0t(target("dx.RawBuffer", %struct.TestStruct64, 1, 0) %buffer, i32 1, i32 8, i32 poison, i64 0)
+  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
+  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
+  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
+  %0 = load i64, ptr %returnVal, align 8
+  ; CHECK; ret i64 [[RETLOAD]]
+  ret i64 %0
+}
+
+define void @_Z23struct_test_no_return64() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.RawBuffer", %struct.TestStruct64, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStruct64s_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 8, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_s_struct.TestStruct64s_1_0t(target("dx.RawBuffer", %struct.TestStruct64, 1, 0) %buffer, i32 1, i32 8, i32 poison, i64 0)
+  ; CHECK: ret void
+  ret void
+}
+
+define i64 @_Z23typed_int_test_return64() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i64, align 8
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i64_1_0_1t(target("dx.TypedBuffer", i64, 1, 0, 1) %buffer, i32 1, i32 poison, i32 poison, i64 0)
+  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
+  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
+  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
+  %0 = load i64, ptr %returnVal, align 8
+  ; CHECK; ret i64 [[RETLOAD]]
+  ret i64 %0
+}
+
+define void @_Z26typed_int_test_no_return64() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i64_1_0_1t(target("dx.TypedBuffer", i64, 1, 0, 1) %buffer, i32 1, i32 poison, i32 poison, i64 0)
+  ; CHECK: ret void
+  ret void
+}
+
+define i64 @_Z24typed_uint_test_return64() {
+entry:
+  ; CHECK: [[RETURN:%.*]] = alloca
+  %returnVal = alloca i64, align 8
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer", i64, 1, 0, 0) %buffer, i32 1, i32 poison, i32 poison, i64 0)
+  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
+  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
+  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
+  %0 = load i64, ptr %returnVal, align 8
+  ; CHECK; ret i64 [[RETLOAD]]
+  ret i64 %0
+}
+
+define void @_Z27typed_uint_test_no_return64() {
+entry:
+  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
+  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
+  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer", i64, 1, 0, 0) %buffer, i32 1, i32 poison, i32 poison, i64 0)
+  ; CHECK: ret void
+  ret void
+}
+
+declare i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) %0, i32 %1, i32 %2, i32 %3, i32 %4)
+declare i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_s_struct.TestStructs_1_0t.i32(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %0, i32 %1, i32 %2, i32 %3, i32 %4)
+declare i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer", i32, 1, 0, 1) %0, i32 %1, i32 %2, i32 %3, i32 %4)
+declare i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_0t.i32(target("dx.TypedBuffer", i32, 1, 0, 0) %0, i32 %1, i32 %2, i32 %3, i32 %4)
+
+declare i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t.i64(target("dx.RawBuffer", i8, 1, 0) %0, i32 %1, i32 %2, i32 %3, i64 %4)
+declare i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_s_struct.TestStruct64s_1_0t.i64(target("dx.RawBuffer", %struct.TestStruct64, 1, 0) %0, i32 %1, i32 %2, i32 %3, i64 %4)
+declare i64 @llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i32_1_0_1t.i64(target("dx.TypedBuffer", i64, 1, 0, 1) %0, i32 %1, i32 %2, i32 %3, i64 %4)
+declare i64 @llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i32_1_0_0t.i64(target("dx.TypedBuffer", i64, 1, 0, 0) %0, i32 %1, i32 %2, i32 %3, i64 %4)
diff --git a/llvm/test/CodeGen/DirectX/interlocked-or64.ll b/llvm/test/CodeGen/DirectX/interlocked-or64.ll
deleted file mode 100644
index 0aaf170c8eea1..0000000000000
--- a/llvm/test/CodeGen/DirectX/interlocked-or64.ll
+++ /dev/null
@@ -1,117 +0,0 @@
-; RUN: opt -S -dxil-op-lower %s | FileCheck %s
-
-target triple = "dxil-pc-shadermodel6.6-compute"
-
-define i64 @_Z20byteaddr_test_return() {
-entry:
-  ; CHECK: [[RETURN:%.*]] = alloca
-  %returnVal = alloca i64, align 8
-  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
-  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
-  %buffer = call target("dx.RawBuffer", i8, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  %hlsl.interlocked.or = call i64 @llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
-  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
-  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
-  %0 = load i64, ptr %returnVal, align 8
-  ; CHECK; ret i64 [[RETLOAD]]
-  ret i64 %0
-}
-
-define void @_Z23byteaddr_test_no_return() {
-entry:
-  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
-  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
-  %buffer = call target("dx.RawBuffer", i8, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  %hlsl.interlocked.or = call i64 @llvm.dx.resource.atomicbinop64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  ; CHECK: ret void
-  ret void
-}
-
-%struct.TestStruct = type { i64, i64 }
-
-define i64 @_Z18struct_test_return() {
-entry:
-  ; CHECK: [[RETURN:%.*]] = alloca
-  %returnVal = alloca i64, align 8
-  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
-  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
-  %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 8, i32 undef, i64 0)
-  %hlsl.interlocked.or = call i64 @llvm.dx.resource.atomicbinop64.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %buffer, i32 2, i32 1, i32 8, i32 undef, i64 0)
-  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
-  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
-  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
-  %0 = load i64, ptr %returnVal, align 8
-  ; CHECK; ret i64 [[RETLOAD]]
-  ret i64 %0
-}
-
-define void @_Z21struct_test_no_return() {
-entry:
-  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
-  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
-  %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 8, i32 undef, i64 0)
-  %hlsl.interlocked.or = call i64 @llvm.dx.resource.atomicbinop64.tdx.RawBuffer_s_struct.TestStructs_1_0t(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %buffer, i32 2, i32 1, i32 8, i32 undef, i64 0)
-  ; CHECK: ret void
-  ret void
-}
-
-define i64 @_Z21typed_int_test_return() {
-entry:
-  ; CHECK: [[RETURN:%.*]] = alloca
-  %returnVal = alloca i64, align 8
-  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
-  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
-  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  %hlsl.interlocked.or = call i64 @llvm.dx.resource.atomicbinop64.tdx.TypedBuffer_i64_1_0_1t(target("dx.TypedBuffer", i64, 1, 0, 1) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
-  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
-  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
-  %0 = load i64, ptr %returnVal, align 8
-  ; CHECK; ret i64 [[RETLOAD]]
-  ret i64 %0
-}
-
-define void @_Z24typed_int_test_no_return() {
-entry:
-  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
-  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
-  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  %hlsl.interlocked.or = call i64 @llvm.dx.resource.atomicbinop64.tdx.TypedBuffer_i64_1_0_1t(target("dx.TypedBuffer", i64, 1, 0, 1) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  ; CHECK: ret void
-  ret void
-}
-
-define i64 @_Z22typed_uint_test_return() {
-entry:
-  ; CHECK: [[RETURN:%.*]] = alloca
-  %returnVal = alloca i64, align 8
-  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
-  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
-  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  %hlsl.interlocked.or = call i64 @llvm.dx.resource.atomicbinop64.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer", i64, 1, 0, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
-  store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
-  ; CHECK: [[RETLOAD:]] = load i64, ptr [[RETURN]]
-  %0 = load i64, ptr %returnVal, align 8
-  ; CHECK; ret i64 [[RETLOAD]]
-  ret i64 %0
-}
-
-define void @_Z25typed_uint_test_no_return() {
-entry:
-  ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
-  ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
-  %buffer = call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  %hlsl.interlocked.or = call i64 @llvm.dx.resource.atomicbinop64.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer", i64, 1, 0, 0) %buffer, i32 2, i32 1, i32 undef, i32 undef, i64 0)
-  ; CHECK: ret void
-  ret void
-}

>From f77b3eed1f3d4e678ccb4f0c5149f365e2946c11 Mon Sep 17 00:00:00 2001
From: Alexander Johnston <alexander.johnston at amd.com>
Date: Thu, 26 Feb 2026 15:20:16 +0000
Subject: [PATCH 3/5] Further cleanup based on review

---
 clang/include/clang/Basic/Builtins.td | 11 +++++++----
 clang/lib/CodeGen/CGHLSLBuiltins.cpp  | 24 +++++++++---------------
 clang/lib/Sema/SemaHLSL.cpp           | 10 ++++------
 3 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 467bd358b836d..8cfd06fa71446 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5439,10 +5439,11 @@ def HLSLInterlockedOr : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
-def HLSLInterlockedOrRet : LangBuiltin<"HLSL_LANG"> {
+def HLSLInterlockedOrRet : LangBuiltin<"HLSL_LANG">,
+    Template<["unsigned int", "int"], ["_uint", "_int"]> {
   let Spellings = ["__builtin_hlsl_interlocked_or_ret"];
   let Attributes = [NoThrow, CustomTypeChecking];
-  let Prototype = "void(...)";
+  let Prototype = "T(...)";
 }
 
 def HLSLInterlockedOr64 : LangBuiltin<"HLSL_LANG"> {
@@ -5451,10 +5452,12 @@ def HLSLInterlockedOr64 : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
-def HLSLInterlockedOrRet64 : LangBuiltin<"HLSL_LANG"> {
+def HLSLInterlockedOrRet64 : LangBuiltin<"HLSL_LANG">,
+    Template<["unsigned long long int", "long long int"],
+             ["_ulonglong",             "_longlong"]> {
   let Spellings = ["__builtin_hlsl_interlocked_or_ret64"];
   let Attributes = [NoThrow, CustomTypeChecking];
-  let Prototype = "void(...)";
+  let Prototype = "T(...)";
 }
 
 // Builtins for XRay.
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 5dca02e5ebe89..02769795c996a 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -366,19 +366,11 @@ static Value *handleInterlockedOr(CodeGenFunction &CGF, const CallExpr *E,
 
     // atomicBinOp
     // opcode, handle, binary operation code, coordinates c0, c1, c2, new val
-    if (Is32Bit) {
-      Intrinsic::ID ID = Intrinsic::dx_interlocked_or;
-      OldValueOp = CGF.Builder.CreateIntrinsic(
-          /*ReturnType=*/CGF.Int32Ty, ID,
-          ArrayRef<Value *>{HandleOp, C0, C1, C2, NewValueOp}, nullptr,
-          "hlsl.interlocked.or");
-    } else {
-      Intrinsic::ID ID = Intrinsic::dx_interlocked_or;
-      OldValueOp = CGF.Builder.CreateIntrinsic(
-          /*ReturnType=*/CGF.Int64Ty, ID,
-          ArrayRef<Value *>{HandleOp, C0, C1, C2, NewValueOp}, nullptr,
-          "hlsl.interlocked.or");
-    }
+    llvm::Type *ReturnType = Is32Bit ? CGF.Int32Ty : CGF.Int64Ty;
+    OldValueOp = CGF.Builder.CreateIntrinsic(
+        ReturnType, Intrinsic::dx_interlocked_or,
+        ArrayRef<Value *>{HandleOp, C0, C1, C2, NewValueOp}, nullptr,
+        "hlsl.interlocked.or");
     break;
   }
   default:
@@ -1427,10 +1419,12 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
   case Builtin::BI__builtin_hlsl_interlocked_or64: {
     return handleInterlockedOr(*this, E, false, false);
   }
-  case Builtin::BI__builtin_hlsl_interlocked_or_ret: {
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret_int:
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret_uint: {
     return handleInterlockedOr(*this, E, true, true);
   }
-  case Builtin::BI__builtin_hlsl_interlocked_or_ret64: {
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret64_longlong:
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret64_ulonglong: {
     return handleInterlockedOr(*this, E, true, false);
   }
   }
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index b14092716e589..2fd52b7e00011 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -4061,7 +4061,8 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     TheCall->setType(SemaRef.getASTContext().VoidTy);
     break;
   }
-  case Builtin::BI__builtin_hlsl_interlocked_or_ret: {
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret_int:
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret_uint: {
     if (SemaRef.checkArgCountRange(TheCall, 4, 5))
       return true;
     auto checkResTy = [this](const HLSLAttributedResourceType *ResTy) -> bool {
@@ -4094,8 +4095,6 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(4),
                               SemaRef.getASTContext().UnsignedIntTy))
         return true;
-
-    TheCall->setType(SemaRef.getASTContext().UnsignedIntTy);
     break;
   }
   case Builtin::BI__builtin_hlsl_interlocked_or64: {
@@ -4140,7 +4139,8 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     TheCall->setType(SemaRef.getASTContext().VoidTy);
     break;
   }
-  case Builtin::BI__builtin_hlsl_interlocked_or_ret64: {
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret64_longlong:
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret64_ulonglong: {
     if (SemaRef.checkArgCountRange(TheCall, 4, 5))
       return true;
     if (CheckShaderModelVersion(&SemaRef, TheCall, VersionTuple(6, 6)))
@@ -4180,8 +4180,6 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
                               SemaRef.getASTContext().UnsignedLongTy))
         return true;
     }
-
-    TheCall->setType(SemaRef.getASTContext().UnsignedLongTy);
     break;
   }
   }

>From ba0ab2c740d3c50aef43811f67655c3a30d2bd56 Mon Sep 17 00:00:00 2001
From: Alexander Johnston <alexander.johnston at amd.com>
Date: Tue, 3 Mar 2026 23:11:43 +0000
Subject: [PATCH 4/5] Refactor clang hlsl interlocked_or builtins

Condense the interlocked_or builtins to remove the 64 variants.
Introduce proper return types on the _ret variant.
Condense and remove duplicate code in SemaHLSL.
---
 clang/include/clang/AST/TypeBase.h            |   3 +
 clang/include/clang/Basic/Builtins.td         |  17 +-
 clang/lib/CodeGen/CGHLSLBuiltins.cpp          |  25 +-
 clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp |  42 +++-
 clang/lib/Sema/SemaHLSL.cpp                   | 217 +++++++++---------
 .../builtins/Interlocked-or-builtin.hlsl      |  34 ++-
 .../CodeGenHLSL/builtins/Interlocked-or.hlsl  | 182 +++++++++------
 .../BuiltIns/interlocked-or-errors.hlsl       |  32 +--
 .../BuiltIns/interlocked-or64-errors.hlsl     |  76 +-----
 9 files changed, 325 insertions(+), 303 deletions(-)

diff --git a/clang/include/clang/AST/TypeBase.h b/clang/include/clang/AST/TypeBase.h
index 9402469f5e12b..f090db7c9f6bf 100644
--- a/clang/include/clang/AST/TypeBase.h
+++ b/clang/include/clang/AST/TypeBase.h
@@ -6801,6 +6801,9 @@ class HLSLAttributedResourceType : public Type, public llvm::FoldingSetNode {
   const Attributes &getAttrs() const { return Attrs; }
   bool isRaw() const { return Attrs.RawBuffer; }
   bool isStructured() const { return !ContainedType->isChar8Type(); }
+  bool isTexture() const {
+    return Attrs.ResourceDimension != llvm::dxil::ResourceDimension::Unknown;
+  }
 
   bool isSugared() const { return false; }
   QualType desugar() const { return QualType(this, 0); }
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 8cfd06fa71446..2bf7d94c3fce7 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5440,26 +5440,13 @@ def HLSLInterlockedOr : LangBuiltin<"HLSL_LANG"> {
 }
 
 def HLSLInterlockedOrRet : LangBuiltin<"HLSL_LANG">,
-    Template<["unsigned int", "int"], ["_uint", "_int"]> {
+    Template<["unsigned int", "int", "unsigned long int", "long int"],
+             ["_uint", "_int", "_ull", "_ll"]> {
   let Spellings = ["__builtin_hlsl_interlocked_or_ret"];
   let Attributes = [NoThrow, CustomTypeChecking];
   let Prototype = "T(...)";
 }
 
-def HLSLInterlockedOr64 : LangBuiltin<"HLSL_LANG"> {
-  let Spellings = ["__builtin_hlsl_interlocked_or64"];
-  let Attributes = [NoThrow, CustomTypeChecking];
-  let Prototype = "void(...)";
-}
-
-def HLSLInterlockedOrRet64 : LangBuiltin<"HLSL_LANG">,
-    Template<["unsigned long long int", "long long int"],
-             ["_ulonglong",             "_longlong"]> {
-  let Spellings = ["__builtin_hlsl_interlocked_or_ret64"];
-  let Attributes = [NoThrow, CustomTypeChecking];
-  let Prototype = "T(...)";
-}
-
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 02769795c996a..2a4238a41e0bf 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -301,7 +301,9 @@ static Value *handleElementwiseF32ToF16(CodeGenFunction &CGF,
 }
 
 static Value *handleInterlockedOr(CodeGenFunction &CGF, const CallExpr *E,
-                                  const bool HasReturn, const bool Is32Bit) {
+                                  const bool HasReturn) {
+  const bool Is32Bit = CGF.getContext().getTypeSize(
+                           E->getArg(E->getNumArgs() - 1)->getType()) == 32;
   Value *HandleOp = CGF.EmitScalarExpr(E->getArg(0));
   Value *IndexOp = CGF.EmitScalarExpr(E->getArg(1));
   Value *StructuredBufIndexOp;
@@ -344,8 +346,10 @@ static Value *handleInterlockedOr(CodeGenFunction &CGF, const CallExpr *E,
     if (!ResourceTy->getAttrs().RawBuffer) {
       assert(
           (ResourceTy->getContainedType() == CGF.getContext().IntTy ||
-           ResourceTy->getContainedType() == CGF.getContext().UnsignedIntTy) &&
-          "AtomicBinOp RWBuffer must contain int or uint");
+           ResourceTy->getContainedType() == CGF.getContext().UnsignedIntTy ||
+           ResourceTy->getContainedType() == CGF.getContext().LongTy ||
+           ResourceTy->getContainedType() == CGF.getContext().UnsignedLongTy) &&
+          "AtomicBinOp RWBuffer must contain 32 or 64bit (unsigned) int type");
       // RWBuffer: c0
       C0 = IndexOp;
 
@@ -1414,18 +1418,13 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(SpecConstantFn, Args);
   }
   case Builtin::BI__builtin_hlsl_interlocked_or: {
-    return handleInterlockedOr(*this, E, false, true);
-  }
-  case Builtin::BI__builtin_hlsl_interlocked_or64: {
-    return handleInterlockedOr(*this, E, false, false);
+    return handleInterlockedOr(*this, E, false);
   }
   case Builtin::BI__builtin_hlsl_interlocked_or_ret_int:
-  case Builtin::BI__builtin_hlsl_interlocked_or_ret_uint: {
-    return handleInterlockedOr(*this, E, true, true);
-  }
-  case Builtin::BI__builtin_hlsl_interlocked_or_ret64_longlong:
-  case Builtin::BI__builtin_hlsl_interlocked_or_ret64_ulonglong: {
-    return handleInterlockedOr(*this, E, true, false);
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret_uint:
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret_ll:
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret_ull: {
+    return handleInterlockedOr(*this, E, true);
   }
   }
   return nullptr;
diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
index 19c3775e2744c..9a5f41d9607fc 100644
--- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
+++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp
@@ -1845,6 +1845,7 @@ BuiltinTypeDeclBuilder::addInterlockedMethodsForBuffer() {
   using PH = BuiltinTypeMethodBuilder::PlaceHolder;
   ASTContext &AST = SemaRef.getASTContext();
   QualType UIntTy = AST.UnsignedIntTy;
+  QualType IntTy = AST.IntTy;
 
   BuiltinTypeMethodBuilder(*this, "InterlockedOr", AST.VoidTy)
       .addParam("dest", UIntTy, HLSLParamModifierAttr::Keyword_in)
@@ -1853,11 +1854,26 @@ BuiltinTypeDeclBuilder::addInterlockedMethodsForBuffer() {
                    PH::_0, PH::_1)
       .finalize();
 
-  return BuiltinTypeMethodBuilder(*this, "InterlockedOr", AST.VoidTy)
+  BuiltinTypeMethodBuilder(*this, "InterlockedOr", AST.VoidTy)
+      .addParam("dest", UIntTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("value", IntTy, HLSLParamModifierAttr::Keyword_in)
+      .callBuiltin("__builtin_hlsl_interlocked_or", QualType(), PH::Handle,
+                   PH::_0, PH::_1)
+      .finalize();
+
+  BuiltinTypeMethodBuilder(*this, "InterlockedOr", AST.VoidTy)
       .addParam("dest", UIntTy, HLSLParamModifierAttr::Keyword_in)
       .addParam("value", UIntTy, HLSLParamModifierAttr::Keyword_in)
       .addParam("original_value", UIntTy, HLSLParamModifierAttr::Keyword_out)
-      .callBuiltin("__builtin_hlsl_interlocked_or_ret", UIntTy, PH::Handle,
+      .callBuiltin("__builtin_hlsl_interlocked_or_ret_uint", UIntTy, PH::Handle,
+                   PH::_0, PH::_1, PH::_2)
+      .finalize();
+
+  return BuiltinTypeMethodBuilder(*this, "InterlockedOr", AST.VoidTy)
+      .addParam("dest", UIntTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("value", IntTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("original_value", IntTy, HLSLParamModifierAttr::Keyword_out)
+      .callBuiltin("__builtin_hlsl_interlocked_or_ret_int", IntTy, PH::Handle,
                    PH::_0, PH::_1, PH::_2)
       .finalize();
 }
@@ -1873,19 +1889,35 @@ BuiltinTypeDeclBuilder::addInterlocked64MethodsForBuffer() {
   using PH = BuiltinTypeMethodBuilder::PlaceHolder;
   QualType UIntTy = AST.UnsignedIntTy;
   QualType ULongTy = AST.UnsignedLongTy;
+  QualType LongTy = AST.LongTy;
 
   BuiltinTypeMethodBuilder(*this, "InterlockedOr64", AST.VoidTy)
       .addParam("dest", UIntTy, HLSLParamModifierAttr::Keyword_in)
       .addParam("value", ULongTy, HLSLParamModifierAttr::Keyword_in)
-      .callBuiltin("__builtin_hlsl_interlocked_or64", QualType(), PH::Handle,
+      .callBuiltin("__builtin_hlsl_interlocked_or", QualType(), PH::Handle,
                    PH::_0, PH::_1)
       .finalize();
 
-  return BuiltinTypeMethodBuilder(*this, "InterlockedOr64", AST.VoidTy)
+  BuiltinTypeMethodBuilder(*this, "InterlockedOr64", AST.VoidTy)
+      .addParam("dest", UIntTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("value", LongTy, HLSLParamModifierAttr::Keyword_in)
+      .callBuiltin("__builtin_hlsl_interlocked_or", QualType(), PH::Handle,
+                   PH::_0, PH::_1)
+      .finalize();
+
+  BuiltinTypeMethodBuilder(*this, "InterlockedOr64", AST.VoidTy)
       .addParam("dest", UIntTy, HLSLParamModifierAttr::Keyword_in)
       .addParam("value", ULongTy, HLSLParamModifierAttr::Keyword_in)
       .addParam("original_value", ULongTy, HLSLParamModifierAttr::Keyword_out)
-      .callBuiltin("__builtin_hlsl_interlocked_or_ret64", ULongTy, PH::Handle,
+      .callBuiltin("__builtin_hlsl_interlocked_or_ret_ull", ULongTy, PH::Handle,
+                   PH::_0, PH::_1, PH::_2)
+      .finalize();
+
+  return BuiltinTypeMethodBuilder(*this, "InterlockedOr64", AST.VoidTy)
+      .addParam("dest", UIntTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("value", LongTy, HLSLParamModifierAttr::Keyword_in)
+      .addParam("original_value", LongTy, HLSLParamModifierAttr::Keyword_out)
+      .callBuiltin("__builtin_hlsl_interlocked_or_ret_ll", LongTy, PH::Handle,
                    PH::_0, PH::_1, PH::_2)
       .finalize();
 }
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 2fd52b7e00011..42f9d51023bbd 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2999,6 +2999,25 @@ static bool CheckArgTypeMatches(Sema *S, Expr *Arg, QualType ExpectedType) {
   return false;
 }
 
+// checks for int or long regardless of sign
+static bool CheckArgTypeMatchesList(Sema *S, Expr *Arg,
+                                    llvm::SmallVector<QualType> ExpectedTypes) {
+  QualType ArgType = Arg->getType().getCanonicalType();
+  bool MatchedType = false;
+  for (const auto ExpectedType : ExpectedTypes)
+    if (ArgType == ExpectedType) {
+      MatchedType = true;
+      return false;
+    }
+  if (!MatchedType) {
+    for (const auto ExpectedType : ExpectedTypes)
+      S->Diag(Arg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
+          << ArgType << ExpectedType << 1 << 0 << 0;
+    return true;
+  }
+  return false;
+}
+
 static bool CheckAllArgTypesAreCorrect(
     Sema *S, CallExpr *TheCall,
     llvm::function_ref<bool(Sema *S, SourceLocation Loc, int ArgOrdinal,
@@ -4023,102 +4042,50 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   case Builtin::BI__builtin_hlsl_interlocked_or: {
     if (SemaRef.checkArgCountRange(TheCall, 3, 4))
       return true;
-    auto checkResTy = [this](const HLSLAttributedResourceType *ResTy) -> bool {
-      bool IsValid = false;
-      const ASTContext &AST = SemaRef.getASTContext();
-      // The resource handle must be either
-      // RWByteAddressBuffer or RWStructuredBuffer
-      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
-                 ResTy->isRaw() && ResTy->hasContainedType();
-      // RWBuffer<int> or RWBuffer<uint>
-      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
-                 !ResTy->isRaw() && ResTy->hasContainedType() &&
-                 (ResTy->getContainedType() == AST.IntTy ||
-                  ResTy->getContainedType() == AST.UnsignedIntTy);
-      // RWTexture<int> or RWTexture<uint> (any dimension)
-      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
-                 !ResTy->isRaw() &&
-                 ResTy->getAttrs().ResourceDimension !=
-                     llvm::dxil::ResourceDimension::Unknown &&
-                 (ResTy->getContainedType() == AST.IntTy ||
-                  ResTy->getContainedType() == AST.UnsignedIntTy);
-      return !IsValid;
-    };
-    if (CheckResourceHandle(&SemaRef, TheCall, 0, checkResTy))
-      return true;
-
-    if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(1),
-                            SemaRef.getASTContext().UnsignedIntTy) ||
-        CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
-                            SemaRef.getASTContext().UnsignedIntTy))
-      return true;
-    // We will have a second index if handling a RWStructuredBuffer
-    if (TheCall->getNumArgs() == 4)
-      if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(3),
-                              SemaRef.getASTContext().UnsignedIntTy))
-        return true;
+    const ASTContext &AST = SemaRef.getASTContext();
 
-    TheCall->setType(SemaRef.getASTContext().VoidTy);
-    break;
-  }
-  case Builtin::BI__builtin_hlsl_interlocked_or_ret_int:
-  case Builtin::BI__builtin_hlsl_interlocked_or_ret_uint: {
-    if (SemaRef.checkArgCountRange(TheCall, 4, 5))
-      return true;
-    auto checkResTy = [this](const HLSLAttributedResourceType *ResTy) -> bool {
+    auto checkResTy = [&](const HLSLAttributedResourceType *ResTy) -> bool {
       bool IsValid = false;
-      const ASTContext &AST = SemaRef.getASTContext();
-      // The resource handle must be either
-      // RWByteAddressBuffer or RWStructuredBuffer
-      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
-                 ResTy->getAttrs().RawBuffer && ResTy->hasContainedType();
-      // RWBuffer<int> or RWBuffer<uint>
-      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
-                 !ResTy->getAttrs().RawBuffer && ResTy->hasContainedType() &&
-                 (ResTy->getContainedType() == AST.IntTy ||
-                  ResTy->getContainedType() == AST.UnsignedIntTy);
-      // TODO: Handle Texture types when implemented
-      return !IsValid;
-    };
-    if (CheckResourceHandle(&SemaRef, TheCall, 0, checkResTy))
-      return true;
+      const bool IsUAV = ResTy->getAttrs().ResourceClass == ResourceClass::UAV;
+      const bool HasElemTy = ResTy->hasContainedType();
+      const bool IsRaw = ResTy->isRaw();
+      const bool IsTexture = ResTy->isTexture();
+      const bool IsIntElem =
+          HasElemTy && (ResTy->getContainedType() == AST.IntTy ||
+                        ResTy->getContainedType() == AST.UnsignedIntTy);
+      const bool IsLongElem =
+          HasElemTy && (ResTy->getContainedType() == AST.LongTy ||
+                        ResTy->getContainedType() == AST.UnsignedLongTy);
 
-    if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(1),
-                            SemaRef.getASTContext().UnsignedIntTy) ||
-        CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
-                            SemaRef.getASTContext().UnsignedIntTy) ||
-        CheckArgTypeMatches(&SemaRef, TheCall->getArg(3),
-                            SemaRef.getASTContext().UnsignedIntTy))
-      return true;
-    // We will have a second index if handling a RWStructuredBuffer
-    if (TheCall->getNumArgs() == 5)
-      if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(4),
-                              SemaRef.getASTContext().UnsignedIntTy))
-        return true;
-    break;
-  }
-  case Builtin::BI__builtin_hlsl_interlocked_or64: {
-    if (SemaRef.checkArgCountRange(TheCall, 3, 4))
-      return true;
-    if (CheckShaderModelVersion(&SemaRef, TheCall, VersionTuple(6, 6)))
-      return true;
-    auto checkResTy = [this](const HLSLAttributedResourceType *ResTy) -> bool {
-      bool IsValid = false;
-      const ASTContext &AST = SemaRef.getASTContext();
       // The resource handle must be either
-      // RWByteAddressBuffer or RWStructuredBuffer
-      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
-                 ResTy->getAttrs().RawBuffer && ResTy->hasContainedType();
-      // RWBuffer<int> or RWBuffer<uint>
-      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
-                 !ResTy->getAttrs().RawBuffer && ResTy->hasContainedType() &&
-                 (ResTy->getContainedType() == AST.LongTy ||
-                  ResTy->getContainedType() == AST.UnsignedLongTy);
-      // TODO: Handle Texture types when implemented
+      IsValid =
+          IsUAV &&
+          ((IsRaw && HasElemTy) || // RWByteAddressBuffer or RWStructuredBuffer
+           (!IsRaw && HasElemTy &&
+            (IsIntElem || IsLongElem)) || // RWBuffer<int/uint/long/ulong>
+           (!IsRaw && HasElemTy && IsTexture &&
+            (IsIntElem || IsLongElem))); // RWTexture<int/uint/long/ulong>
+
       return !IsValid;
     };
     if (CheckResourceHandle(&SemaRef, TheCall, 0, checkResTy))
       return true;
+    // 64bit interlocked is only valid in SM6.6+
+    // Check the final arg (which will be the new value) for the size used.
+    bool Is64Bit =
+        AST.getTypeSize(
+            TheCall->getArg(TheCall->getNumArgs() - 1)->getType()) == 64;
+    if (Is64Bit)
+      if (CheckShaderModelVersion(&SemaRef, TheCall, VersionTuple(6, 6)))
+        return true;
+
+    llvm::SmallVector<QualType> LegalTypes;
+    if (Is64Bit) {
+      LegalTypes = {AST.IntTy, AST.UnsignedIntTy, AST.LongTy,
+                    AST.UnsignedLongTy};
+    } else {
+      LegalTypes = {AST.IntTy, AST.UnsignedIntTy};
+    }
 
     if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(1),
                             SemaRef.getASTContext().UnsignedIntTy))
@@ -4127,57 +4094,79 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     if (TheCall->getNumArgs() == 4) {
       if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
                               SemaRef.getASTContext().UnsignedIntTy) ||
-          CheckArgTypeMatches(&SemaRef, TheCall->getArg(3),
-                              SemaRef.getASTContext().UnsignedLongTy))
+          CheckArgTypeMatchesList(&SemaRef, TheCall->getArg(3), LegalTypes))
         return true;
     } else {
-      if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
-                              SemaRef.getASTContext().UnsignedLongTy))
+      if (CheckArgTypeMatchesList(&SemaRef, TheCall->getArg(2), LegalTypes))
         return true;
     }
 
     TheCall->setType(SemaRef.getASTContext().VoidTy);
     break;
   }
-  case Builtin::BI__builtin_hlsl_interlocked_or_ret64_longlong:
-  case Builtin::BI__builtin_hlsl_interlocked_or_ret64_ulonglong: {
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret_int:
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret_uint:
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret_ll:
+  case Builtin::BI__builtin_hlsl_interlocked_or_ret_ull: {
     if (SemaRef.checkArgCountRange(TheCall, 4, 5))
       return true;
-    if (CheckShaderModelVersion(&SemaRef, TheCall, VersionTuple(6, 6)))
-      return true;
-    auto checkResTy = [this](const HLSLAttributedResourceType *ResTy) -> bool {
+    const ASTContext &AST = SemaRef.getASTContext();
+    auto checkResTy = [&](const HLSLAttributedResourceType *ResTy) -> bool {
       bool IsValid = false;
-      const ASTContext &AST = SemaRef.getASTContext();
+      const bool IsUAV = ResTy->getAttrs().ResourceClass == ResourceClass::UAV;
+      const bool HasElemTy = ResTy->hasContainedType();
+      const bool IsRaw = ResTy->isRaw();
+      const bool IsTexture = ResTy->isTexture();
+      const bool IsIntElem =
+          HasElemTy && (ResTy->getContainedType() == AST.IntTy ||
+                        ResTy->getContainedType() == AST.UnsignedIntTy);
+      const bool IsLongElem =
+          HasElemTy && (ResTy->getContainedType() == AST.LongTy ||
+                        ResTy->getContainedType() == AST.UnsignedLongTy);
+
       // The resource handle must be either
-      // RWByteAddressBuffer or RWStructuredBuffer
-      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
-                 ResTy->getAttrs().RawBuffer && ResTy->hasContainedType();
-      // RWBuffer<int> or RWBuffer<uint>
-      IsValid |= ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
-                 !ResTy->getAttrs().RawBuffer && ResTy->hasContainedType() &&
-                 (ResTy->getContainedType() == AST.LongTy ||
-                  ResTy->getContainedType() == AST.UnsignedLongTy);
-      // TODO: Handle Texture types when implemented
+      IsValid =
+          IsUAV &&
+          ((IsRaw && HasElemTy) || // RWByteAddressBuffer or RWStructuredBuffer
+           (!IsRaw && HasElemTy &&
+            (IsIntElem || IsLongElem)) || // RWBuffer<int/uint/long/ulong>
+           (!IsRaw && HasElemTy && IsTexture &&
+            (IsIntElem || IsLongElem))); // RWTexture<int/uint/long/ulong>
+
       return !IsValid;
     };
     if (CheckResourceHandle(&SemaRef, TheCall, 0, checkResTy))
       return true;
+    // 64bit interlocked is only valid in SM6.6+
+    // Check the final arg (which will be the return value) for the size used.
+    bool Is64Bit =
+        AST.getTypeSize(
+            TheCall->getArg(TheCall->getNumArgs() - 1)->getType()) == 64;
+    if (Is64Bit)
+      if (CheckShaderModelVersion(&SemaRef, TheCall, VersionTuple(6, 6)))
+        return true;
+
+    llvm::SmallVector<QualType> LegalTypes;
+    if (Is64Bit) {
+      LegalTypes = {AST.IntTy, AST.UnsignedIntTy, AST.LongTy,
+                    AST.UnsignedLongTy};
+    } else {
+      LegalTypes = {AST.IntTy, AST.UnsignedIntTy};
+    }
 
     if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(1),
-                            SemaRef.getASTContext().UnsignedIntTy) ||
-        CheckArgTypeMatches(&SemaRef, TheCall->getArg(3),
-                            SemaRef.getASTContext().UnsignedLongTy))
+                            SemaRef.getASTContext().UnsignedIntTy))
       return true;
     // We will have a second index if handling a RWStructuredBuffer
     if (TheCall->getNumArgs() == 5) {
       if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
                               SemaRef.getASTContext().UnsignedIntTy) ||
-          CheckArgTypeMatches(&SemaRef, TheCall->getArg(4),
-                              SemaRef.getASTContext().UnsignedLongTy))
+          CheckArgTypeMatchesList(&SemaRef, TheCall->getArg(3), LegalTypes) ||
+          CheckArgTypeMatchesList(&SemaRef, TheCall->getArg(4), LegalTypes))
         return true;
     } else {
-      if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2),
-                              SemaRef.getASTContext().UnsignedLongTy))
+      if (CheckArgTypeMatchesList(&SemaRef, TheCall->getArg(2), LegalTypes) ||
+          CheckArgTypeMatchesList(&SemaRef, TheCall->getArg(3), LegalTypes))
         return true;
     }
     break;
diff --git a/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl
index b43666c89a45f..3bcd4d76948ad 100644
--- a/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/Interlocked-or-builtin.hlsl
@@ -5,16 +5,36 @@
 // RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.6-library %s \
 // RUN:  -emit-llvm -disable-llvm-passes -o - -DUINTBUF | FileCheck %s --check-prefixes=CHECK-UINTBUF
 // RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.6-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -o - -DLONGBUF | FileCheck %s --check-prefixes=CHECK-LONGBUF
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.6-library %s \
+// RUN:  -emit-llvm -disable-llvm-passes -o - -DULONGBUF | FileCheck %s --check-prefixes=CHECK-ULONGBUF
+// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.6-library %s \
 // RUN:  -emit-llvm -disable-llvm-passes -o - -DSTRUCTURED | FileCheck %s --check-prefixes=CHECK-STRUCTURED
 
 #ifdef BYTEADDRESS
 using handle_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer]] [[hlsl::contained_type(char)]];
+using ret_ty = unsigned int;
+#define INVAL 0u
 #endif
 #ifdef INTBUF
 using handle_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(int)]];
+using ret_ty = int;
+#define INVAL 0
 #endif
 #ifdef UINTBUF
 using handle_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(unsigned int)]];
+using ret_ty = unsigned int;
+#define INVAL 0u
+#endif
+#ifdef LONGBUF
+using handle_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(long)]];
+using ret_ty = long;
+#define INVAL 0l
+#endif
+#ifdef ULONGBUF
+using handle_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(unsigned long)]];
+using ret_ty = unsigned long;
+#define INVAL 0ul
 #endif
 #ifdef STRUCTURED
 struct TestStruct {
@@ -35,12 +55,14 @@ struct CustomResource {
 // CHECK-BYTEADDRESS: %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
 // CHECK-INTBUF: %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer", i32, 1, 0, 1) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
 // CHECK-UINTBUF: %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_0t.i32(target("dx.TypedBuffer", i32, 1, 0, 0) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
+// CHECK-LONGBUF: %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i64_1_0_1t.i64(target("dx.TypedBuffer", i64, 1, 0, 1) {{%.*}}, i32 1, i32 poison, i32 poison, i64 0)
+// CHECK-ULONGBUF: %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i64_1_0_0t.i64(target("dx.TypedBuffer", i64, 1, 0, 0) {{%.*}}, i32 1, i32 poison, i32 poison, i64 0)
 // CHECK-NEXT: store i32 %hlsl.interlocked.or, ptr [[returnVal:%.*]], align 4
 // CHECK-NEXT: [[loadedReturnVal:%.*]] = load i32, ptr [[returnVal]], align 4
 // CHECK-NEXT: ret i32 [[loadedReturnVal]]
-unsigned int test_return(CustomResource cr) {
-  unsigned int returnVal = 0u;
-  __builtin_hlsl_interlocked_or_ret(cr.h, 1u, 0u, returnVal);
+ret_ty test_return(CustomResource cr) {
+  ret_ty returnVal = 0;
+  __builtin_hlsl_interlocked_or_ret_uint(cr.h, 1u, INVAL, returnVal);
   return returnVal;
 }
 
@@ -48,9 +70,11 @@ unsigned int test_return(CustomResource cr) {
 // CHECK-BYTEADDRESS: %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
 // CHECK-INTBUF: %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer", i32, 1, 0, 1) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
 // CHECK-UINTBUF: %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_0t.i32(target("dx.TypedBuffer", i32, 1, 0, 0) {{%.*}}, i32 1, i32 poison, i32 poison, i32 0)
+// CHECK-LONGBUF: %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i64_1_0_1t.i64(target("dx.TypedBuffer", i64, 1, 0, 1) {{%.*}}, i32 1, i32 poison, i32 poison, i64 0)
+// CHECK-ULONGBUF: %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i64_1_0_0t.i64(target("dx.TypedBuffer", i64, 1, 0, 0) {{%.*}}, i32 1, i32 poison, i32 poison, i64 0)
 // CHECK-NEXT: ret void
 void test_no_return(CustomResource h) {
-  __builtin_hlsl_interlocked_or(h.h, 1u, 0u);
+  __builtin_hlsl_interlocked_or(h.h, 1u, INVAL);
 }
 
 #else
@@ -62,7 +86,7 @@ void test_no_return(CustomResource h) {
 // CHECK-STRUCTURED-NEXT: ret i32 [[loadedReturnVal]]
 unsigned int test_return(CustomResource cr) {
   unsigned int returnVal = 0u;
-  __builtin_hlsl_interlocked_or_ret(cr.h, 1u, 4u, 0u, returnVal);
+  __builtin_hlsl_interlocked_or_ret_uint(cr.h, 1u, 4u, 0u, returnVal);
   return returnVal;
 }
 
diff --git a/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl b/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl
index b6611ff0800ae..f674ec57cbdca 100644
--- a/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/Interlocked-or.hlsl
@@ -1,98 +1,152 @@
-// RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.0-library %s \
-// RUN:  -emit-llvm -disable-llvm-passes -o - -DINTERLOCKED32 | \
-// RUN:  FileCheck %s --check-prefixes=CHECK-32
 // RUN: %clang_cc1 -finclude-default-header  -x hlsl  -triple dxil-pc-shadermodel6.6-library %s \
-// RUN:  -emit-llvm -disable-llvm-passes -o - -DINTERLOCKED64 | \
-// RUN:  FileCheck %s --check-prefixes=CHECK-64
+// RUN:  -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK
 
 RWByteAddressBuffer buf: register(u0);
 
 // CHECK: %"class.hlsl::RWByteAddressBuffer" = type { target("dx.RawBuffer", i8, 1, 0) }
 
-#ifdef INTERLOCKED32
-
-// CHECK-32-LABEL: define {{.*}} @_Z11test_return
-// CHECK-32: call void @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjjRj
-// CHECK-32: ret i32 {{%.*}}
+// CHECK-LABEL: define {{.*}} @_Z11test_return
+// CHECK: call void @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjjRj
+// CHECK: ret i32 {{%.*}}
 uint test_return() {
   uint returnVal;
-  buf.InterlockedOr(0, 0, returnVal);
+  buf.InterlockedOr(0, 0u, returnVal);
   return returnVal;
 }
 
-// CHECK-32-LABEL: define {{.*}} @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjjRj(
-// CHECK-32: [[this_addr:%.*]] = alloca ptr
-// CHECK-32: [[original_val:%.*]] = alloca ptr
-// CHECK-32: [[this:%.*]] = load ptr, ptr [[this_addr]]
-// CHECK-32: [[handle:%.*]] = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
-// CHECK-32: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
-// CHECK-32: [[dest:%.*]] = load i32, ptr %dest.addr
-// CHECK-32: [[newval:%.*]] = load i32, ptr %value.addr
-// CHECK-32: [[result:%.*]] = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i32 [[newval]])
-// CHECK-32: [[loaded_orig_val_ptr:%.*]] = load ptr, ptr [[original_val]]
-// CHECK-32: store i32 [[result]], ptr [[loaded_orig_val_ptr]]
-
-// CHECK-32-LABEL: define {{.*}} @_Z14test_no_return
-// CHECK-32: call void @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjj
-// CHECK-32: ret void
+// CHECK-LABEL: define {{.*}} @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjjRj(
+// CHECK: [[this_addr:%.*]] = alloca ptr
+// CHECK: [[original_val:%.*]] = alloca ptr
+// CHECK: [[this:%.*]] = load ptr, ptr [[this_addr]]
+// CHECK: [[handle:%.*]] = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
+// CHECK: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
+// CHECK: [[dest:%.*]] = load i32, ptr %dest.addr
+// CHECK: [[newval:%.*]] = load i32, ptr %value.addr
+// CHECK: [[result:%.*]] = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i32 [[newval]])
+// CHECK: [[loaded_orig_val_ptr:%.*]] = load ptr, ptr [[original_val]]
+// CHECK: store i32 [[result]], ptr [[loaded_orig_val_ptr]]
+
+// CHECK-LABEL: define {{.*}} @_Z12test_returnS
+// CHECK: call void @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjiRi
+// CHECK: ret i32 {{%.*}}
+int test_returnS() {
+  int returnValS;
+  buf.InterlockedOr(0, 0, returnValS);
+  return returnValS;
+}
+
+// CHECK-LABEL: define {{.*}} @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjiRi(
+// CHECK: [[this_addr:%.*]] = alloca ptr
+// CHECK: [[original_val:%.*]] = alloca ptr
+// CHECK: [[this:%.*]] = load ptr, ptr [[this_addr]]
+// CHECK: [[handle:%.*]] = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
+// CHECK: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
+// CHECK: [[dest:%.*]] = load i32, ptr %dest.addr
+// CHECK: [[newval:%.*]] = load i32, ptr %value.addr
+// CHECK: [[result:%.*]] = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i32 [[newval]])
+// CHECK: [[loaded_orig_val_ptr:%.*]] = load ptr, ptr [[original_val]]
+// CHECK: store i32 [[result]], ptr [[loaded_orig_val_ptr]]
+
+// CHECK-LABEL: define {{.*}} @_Z14test_no_return
+// CHECK: call void @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjj
+// CHECK: call void @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEji
+// CHECK: ret void
 void test_no_return() {
+  buf.InterlockedOr(0, 0u);
   buf.InterlockedOr(0, 0);
 }
 
-// CHECK-32-LABEL: define {{.*}} void @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjj(
-// CHECK-32: [[this_addr:%.*]] = alloca ptr
-// CHECK-32: [[this:%.*]] = load ptr, ptr [[this_addr]]
-// CHECK-32: [[handle:%.*]] = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
-// CHECK-32: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
-// CHECK-32: [[dest:%.*]] = load i32, ptr %dest.addr
-// CHECK-32: [[newval:%.*]] = load i32, ptr %value.addr
-// CHECK-32: {{%.*}} = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i32 [[newval]])
-// CHECK-32: ret void
-
-// CHECK-32: declare i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0), i32, i32, i32, i32)
-
-#endif
+// CHECK-LABEL: define {{.*}} void @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEjj(
+// CHECK: [[this_addr:%.*]] = alloca ptr
+// CHECK: [[this:%.*]] = load ptr, ptr [[this_addr]]
+// CHECK: [[handle:%.*]] = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
+// CHECK: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
+// CHECK: [[dest:%.*]] = load i32, ptr %dest.addr
+// CHECK: [[newval:%.*]] = load i32, ptr %value.addr
+// CHECK: {{%.*}} = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i32 [[newval]])
+// CHECK: ret void
 
-#ifdef INTERLOCKED64
+// CHECK-LABEL: define {{.*}} void @_ZN4hlsl19RWByteAddressBuffer13InterlockedOrEji(
+// CHECK: [[this_addr:%.*]] = alloca ptr
+// CHECK: [[this:%.*]] = load ptr, ptr [[this_addr]]
+// CHECK: [[handle:%.*]] = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
+// CHECK: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
+// CHECK: [[dest:%.*]] = load i32, ptr %dest.addr
+// CHECK: [[newval:%.*]] = load i32, ptr %value.addr
+// CHECK: {{%.*}} = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i32 [[newval]])
+// CHECK: ret void
 
 // CHECK-LABEL: define {{.*}} @_Z13test_return64
 // CHECK: call void @_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64EjmRm
 // CHECK: ret i64 {{%.*}}
 uint64_t test_return64() {
   uint64_t returnVal;
-  buf.InterlockedOr64(0, 0, returnVal);
+  buf.InterlockedOr64(0, 0ul, returnVal);
   return returnVal;
 }
 
-// CHECK-64-LABEL: define {{.*}} void @_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64EjmRm(
-// CHECK-64: [[this_addr:%.*]] = alloca ptr
-// CHECK-64: [[original_val:%.*]] = alloca ptr
-// CHECK-64: [[this:%.*]] = load ptr, ptr [[this_addr]]
-// CHECK-64: [[handle:%.*]] = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
-// CHECK-64: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
-// CHECK-64: [[dest:%.*]] = load i32, ptr %dest.addr
-// CHECK-64: [[newval:%.*]] = load i64, ptr %value.addr
-// CHECK-64: [[result:%.*]] = call i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t.i64(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i64 [[newval]])
-// CHECK-64: [[loaded_orig_val_ptr:%.*]] = load ptr, ptr [[original_val]]
-// CHECK-64: store i64 [[result]], ptr [[loaded_orig_val_ptr]]
+// CHECK-LABEL: define {{.*}} void @_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64EjmRm(
+// CHECK: [[this_addr:%.*]] = alloca ptr
+// CHECK: [[original_val:%.*]] = alloca ptr
+// CHECK: [[this:%.*]] = load ptr, ptr [[this_addr]]
+// CHECK: [[handle:%.*]] = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
+// CHECK: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
+// CHECK: [[dest:%.*]] = load i32, ptr %dest.addr
+// CHECK: [[newval:%.*]] = load i64, ptr %value.addr
+// CHECK: [[result:%.*]] = call i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t.i64(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i64 [[newval]])
+// CHECK: [[loaded_orig_val_ptr:%.*]] = load ptr, ptr [[original_val]]
+// CHECK: store i64 [[result]], ptr [[loaded_orig_val_ptr]]
+
+// CHECK-LABEL: define {{.*}} @_Z14test_return64S
+// CHECK: call void @_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64EjlRl
+// CHECK: ret i64 {{%.*}}
+int64_t test_return64S() {
+  int64_t returnValS;
+  buf.InterlockedOr64(0, 0l, returnValS);
+  return returnValS;
+}
+
+// CHECK-LABEL: define {{.*}} void @_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64EjlRl(
+// CHECK: [[this_addr:%.*]] = alloca ptr
+// CHECK: [[original_val:%.*]] = alloca ptr
+// CHECK: [[this:%.*]] = load ptr, ptr [[this_addr]]
+// CHECK: [[handle:%.*]] = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
+// CHECK: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
+// CHECK: [[dest:%.*]] = load i32, ptr %dest.addr
+// CHECK: [[newval:%.*]] = load i64, ptr %value.addr
+// CHECK: [[result:%.*]] = call i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t.i64(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i64 [[newval]])
+// CHECK: [[loaded_orig_val_ptr:%.*]] = load ptr, ptr [[original_val]]
+// CHECK: store i64 [[result]], ptr [[loaded_orig_val_ptr]]
 
 // CHECK-LABEL: define {{.*}} @_Z16test_no_return64
 // CHECK: call void @_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64Ejm
+// CHECK: call void @_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64Ejl
 // CHECK: ret void
 void test_no_return64() {
-  buf.InterlockedOr64(0, 0);
+  buf.InterlockedOr64(0, 0ul);
+  buf.InterlockedOr64(0, 0l);
 }
 
-// CHECK-64-LABEL: define {{.*}} void @_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64Ejm(
-// CHECK-64: [[this_addr:%.*]] = alloca ptr
-// CHECK-64: [[this:%.*]] = load ptr, ptr [[this_addr]]
-// CHECK-64: [[handle:%.*]] = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
-// CHECK-64: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
-// CHECK-64: [[dest:%.*]] = load i32, ptr %dest.addr
-// CHECK-64: [[newval:%.*]] = load i64, ptr %value.addr
-// CHECK-64: {{.*}} = call i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t.i64(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i64 [[newval]])
-// CHECK-64: ret void
+// CHECK-LABEL: define {{.*}} void @_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64Ejm(
+// CHECK: [[this_addr:%.*]] = alloca ptr
+// CHECK: [[this:%.*]] = load ptr, ptr [[this_addr]]
+// CHECK: [[handle:%.*]] = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
+// CHECK: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
+// CHECK: [[dest:%.*]] = load i32, ptr %dest.addr
+// CHECK: [[newval:%.*]] = load i64, ptr %value.addr
+// CHECK: {{.*}} = call i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t.i64(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i64 [[newval]])
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}} void @_ZN4hlsl19RWByteAddressBuffer15InterlockedOr64Ejl(
+// CHECK: [[this_addr:%.*]] = alloca ptr
+// CHECK: [[this:%.*]] = load ptr, ptr [[this_addr]]
+// CHECK: [[handle:%.*]] = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr [[this]], i32 0, i32 0
+// CHECK: [[buf:%.*]] = load target("dx.RawBuffer", i8, 1, 0), ptr [[handle]]
+// CHECK: [[dest:%.*]] = load i32, ptr %dest.addr
+// CHECK: [[newval:%.*]] = load i64, ptr %value.addr
+// CHECK: {{.*}} = call i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t.i64(target("dx.RawBuffer", i8, 1, 0) [[buf]], i32 [[dest]], i32 poison, i32 poison, i64 [[newval]])
+// CHECK: ret void
 
-// CHECK-64: declare i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t.i64(target("dx.RawBuffer", i8, 1, 0), i32, i32, i32, i64)
+// CHECK: declare i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0), i32, i32, i32, i32)
 
-#endif
+// CHECK: declare i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t.i64(target("dx.RawBuffer", i8, 1, 0), i32, i32, i32, i64)
diff --git a/clang/test/SemaHLSL/BuiltIns/interlocked-or-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/interlocked-or-errors.hlsl
index 2c9da9b840297..bc334853e824d 100644
--- a/clang/test/SemaHLSL/BuiltIns/interlocked-or-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/interlocked-or-errors.hlsl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library %s -verify
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library %s -verify
 
 void no_arg() {
   __builtin_hlsl_interlocked_or();
@@ -16,30 +16,30 @@ void non_resource_arg() {
 }
 
 void ret_no_arg() {
-  __builtin_hlsl_interlocked_or_ret();
+  __builtin_hlsl_interlocked_or_ret_uint();
   // expected-error at -1 {{too few arguments to function call, expected 4, have 0}}
 }
 
 void ret_too_many_args() {
-  __builtin_hlsl_interlocked_or_ret(0, 0, 0, 0, 0, 0);
+  __builtin_hlsl_interlocked_or_ret_uint(0, 0, 0, 0, 0, 0);
   // expected-error at -1 {{too many arguments to function call, expected at most 5, have 6}}
 }
 
 void ret_non_resource_arg() {
-  __builtin_hlsl_interlocked_or_ret(0, 0, 0, 0);
+  __builtin_hlsl_interlocked_or_ret_uint(0, 0, 0, 0);
   // expected-error at -1 {{used type 'int' where __hlsl_resource_t is required}}
 }
 
 // ByteAddressBuffer
 using handle_char_t = __hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::raw_buffer]] [[hlsl::contained_type(char)]];
 // Buffer<int>
-using handle_int_t = __hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::contained_type(int)]];
+using handle_int_t = __hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::contained_type(float)]];
 // RWBuffer<float>
 using handle_float_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(float)]];
 
 struct CustomResource {
   handle_char_t ByteAddressBufferChar;
-  handle_int_t BufferInt;
+  handle_int_t BufferFloat;
   handle_float_t RWBufferFloat;
 };
 
@@ -49,36 +49,26 @@ void invalid_byte_address_buffer(CustomResource CR) {
 }
 
 void invalid_typed_buffer(CustomResource CR) {
-  __builtin_hlsl_interlocked_or(CR.BufferInt, 0, 0);
+  __builtin_hlsl_interlocked_or(CR.BufferFloat, 0u, 0u);
   // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
 }
 
 void invalid_rw_typed_buffer(CustomResource CR) {
-  __builtin_hlsl_interlocked_or(CR.RWBufferFloat, 0, 0);
+  __builtin_hlsl_interlocked_or(CR.RWBufferFloat, 0u, 0);
   // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
 }
 
 void ret_invalid_byte_address_buffer(CustomResource CR) {
-  __builtin_hlsl_interlocked_or_ret(CR.ByteAddressBufferChar, 0, 0, 0);
+  __builtin_hlsl_interlocked_or_ret_uint(CR.ByteAddressBufferChar, 0u, 0, 0);
   // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
 }
 
 void ret_invalid_typed_buffer(CustomResource CR) {
-  __builtin_hlsl_interlocked_or_ret(CR.BufferInt, 0, 0, 0);
+  __builtin_hlsl_interlocked_or_ret_uint(CR.BufferFloat, 0u, 0, 0);
   // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
 }
 
 void ret_invalid_rw_typed_buffer(CustomResource CR) {
-  __builtin_hlsl_interlocked_or_ret(CR.RWBufferFloat, 0, 0, 0);
+  __builtin_hlsl_interlocked_or_ret_uint(CR.RWBufferFloat, 0u, 0, 0);
   // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
 }
-
-void wrong_shader_model() {
-  __builtin_hlsl_interlocked_or64(0, 0, 0, 0);
-  // expected-error at -1 {{intrinsic '__builtin_hlsl_interlocked_or64(0, 0, 0, 0)' requires shader model 6.6 or greater}}
-}
-
-void ret_wrong_shader_model() {
-  __builtin_hlsl_interlocked_or_ret64(0, 0, 0, 0);
-  // expected-error at -1 {{intrinsic '__builtin_hlsl_interlocked_or_ret64(0, 0, 0, 0)' requires shader model 6.6 or greater}}
-}
diff --git a/clang/test/SemaHLSL/BuiltIns/interlocked-or64-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/interlocked-or64-errors.hlsl
index 5a2a0ea2b23af..6f3327ba40c38 100644
--- a/clang/test/SemaHLSL/BuiltIns/interlocked-or64-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/interlocked-or64-errors.hlsl
@@ -1,74 +1,18 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library %s -verify
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library %s -verify
 
-void no_arg() {
-  __builtin_hlsl_interlocked_or64();
-  // expected-error at -1 {{too few arguments to function call, expected 3, have 0}}
-}
-
-void too_many_args() {
-  __builtin_hlsl_interlocked_or64(0, 0, 0, 0, 0);
-  // expected-error at -1 {{too many arguments to function call, expected at most 4, have 5}}
-}
-
-void non_resource_arg() {
-  __builtin_hlsl_interlocked_or64(0, 0, 0);
-  // expected-error at -1 {{used type 'int' where __hlsl_resource_t is required}}
-}
-
-void ret_no_arg() {
-  __builtin_hlsl_interlocked_or_ret64();
-  // expected-error at -1 {{too few arguments to function call, expected 4, have 0}}
-}
-
-void ret_too_many_args() {
-  __builtin_hlsl_interlocked_or_ret64(0, 0, 0, 0, 0, 0);
-  // expected-error at -1 {{too many arguments to function call, expected at most 5, have 6}}
-}
-
-void ret_non_resource_arg() {
-  __builtin_hlsl_interlocked_or_ret64(0, 0, 0, 0);
-  // expected-error at -1 {{used type 'int' where __hlsl_resource_t is required}}
-}
-
-// ByteAddressBuffer
-using handle_char_t = __hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::raw_buffer]] [[hlsl::contained_type(char)]];
-// Buffer<int>
-using handle_int_t = __hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::contained_type(int)]];
-// RWBuffer<float>
-using handle_float_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(float)]];
+using handle_long_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(long)]];
 
 struct CustomResource {
-  handle_char_t ByteAddressBufferChar;
-  handle_int_t BufferInt;
-  handle_float_t RWBufferFloat;
+  handle_long_t BufferLong;
 };
 
-void invalid_byte_address_buffer(CustomResource CR) {
-  __builtin_hlsl_interlocked_or64(CR.ByteAddressBufferChar, 0, 0);
-  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
-}
-
-void invalid_typed_buffer(CustomResource CR) {
-  __builtin_hlsl_interlocked_or64(CR.BufferInt, 0, 0);
-  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
-}
-
-void invalid_rw_typed_buffer(CustomResource CR) {
-  __builtin_hlsl_interlocked_or64(CR.RWBufferFloat, 0, 0);
-  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
-}
-
-void ret_invalid_byte_address_buffer(CustomResource CR) {
-  __builtin_hlsl_interlocked_or_ret64(CR.ByteAddressBufferChar, 0, 0, 0);
-  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
-}
-
-void ret_invalid_typed_buffer(CustomResource CR) {
-  __builtin_hlsl_interlocked_or_ret64(CR.BufferInt, 0, 0, 0);
-  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
+void wrong_shader_model(CustomResource CR) {
+  __builtin_hlsl_interlocked_or(CR.BufferLong, 0u, 0l);
+  // expected-error at -1 {{intrinsic '__builtin_hlsl_interlocked_or(CR.BufferLong, 0U, 0L)' requires shader model 6.6 or greater}}
 }
 
-void ret_invalid_rw_typed_buffer(CustomResource CR) {
-  __builtin_hlsl_interlocked_or_ret64(CR.RWBufferFloat, 0, 0, 0);
-  // expected-error at -1 {{invalid __hlsl_resource_t type attributes}}
+void ret_wrong_shader_model(CustomResource CR) {
+  long ret;
+  __builtin_hlsl_interlocked_or_ret_ll(CR.BufferLong, 0u, 0l, ret);
+  // expected-error at -1 {{intrinsic '__builtin_hlsl_interlocked_or_ret_ll(CR.BufferLong, 0U, 0L, ret)' requires shader model 6.6 or greater}}
 }

>From 8b0a1e7e97f05f74ca4aceab910d7677e65316a2 Mon Sep 17 00:00:00 2001
From: Alexander Johnston <alexander.johnston at amd.com>
Date: Tue, 3 Mar 2026 23:18:43 +0000
Subject: [PATCH 5/5] Update interlocked_or test in DXIL backend with poison
 vals

---
 llvm/test/CodeGen/DirectX/interlocked-or.ll | 32 ++++++++++-----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/llvm/test/CodeGen/DirectX/interlocked-or.ll b/llvm/test/CodeGen/DirectX/interlocked-or.ll
index ef03d7c67b758..7726a546f240f 100644
--- a/llvm/test/CodeGen/DirectX/interlocked-or.ll
+++ b/llvm/test/CodeGen/DirectX/interlocked-or.ll
@@ -9,7 +9,7 @@ entry:
   ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.RawBuffer", i8, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 0, i32 poison, i32 poison, i32 0)
   %hlsl.interlocked.or = tail call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 0, i32 poison, i32 poison, i32 0)
   ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
   store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
@@ -24,7 +24,7 @@ entry:
   ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.RawBuffer", i8, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 poison, i32 poison, i32 0)
   %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_i8_1_0t.i32(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 1, i32 poison, i32 poison, i32 0)
   ; CHECK: ret void
   ret void
@@ -39,7 +39,7 @@ entry:
   ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 4, i32 undef, i32 0)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 4, i32 poison, i32 0)
   %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_s_struct.TestStructs_1_0t.i32(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %buffer, i32 1, i32 4, i32 poison, i32 0)
   ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
   store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
@@ -54,7 +54,7 @@ entry:
   ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.RawBuffer", %struct.TestStruct, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStructs_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 4, i32 undef, i32 0)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 4, i32 poison, i32 0)
   %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.RawBuffer_s_struct.TestStructs_1_0t.i32(target("dx.RawBuffer", %struct.TestStruct, 1, 0) %buffer,  i32 1, i32 4, i32 poison, i32 0)
   ; CHECK: ret void
   ret void
@@ -67,7 +67,7 @@ entry:
   ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 poison, i32 poison, i32 0)
   %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer", i32, 1, 0, 1) %buffer,  i32 1, i32 poison, i32 poison, i32 0)
   ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
   store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
@@ -82,7 +82,7 @@ entry:
   ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.TypedBuffer", i32, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 poison, i32 poison, i32 0)
   %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_1t.i32(target("dx.TypedBuffer", i32, 1, 0, 1) %buffer, i32 1, i32 poison, i32 poison, i32 0)
   ; CHECK: ret void
   ret void
@@ -95,7 +95,7 @@ entry:
   ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.TypedBuffer", i32, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 poison, i32 poison, i32 0)
   %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_0t.i32(target("dx.TypedBuffer", i32, 1, 0, 0) %buffer, i32 1, i32 poison, i32 poison, i32 0)
   ; CHECK: store i32 [[INTERLOCKED]], ptr [[RETURN]]
   store i32 %hlsl.interlocked.or, ptr %returnVal, align 4
@@ -110,7 +110,7 @@ entry:
   ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.TypedBuffer", i32, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i32_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i32 0)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 poison, i32 poison, i32 0)
   %hlsl.interlocked.or = call i32 @llvm.dx.interlocked.or.i32.tdx.TypedBuffer_i32_1_0_0t.i32(target("dx.TypedBuffer", i32, 1, 0, 0) %buffer, i32 1, i32 poison, i32 poison, i32 0)
   ; CHECK: ret void
   ret void
@@ -123,7 +123,7 @@ entry:
   ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.RawBuffer", i8, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 poison, i32 poison, i64 0)
   %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 1, i32 poison, i32 poison, i64 0)
   ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
   store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
@@ -138,7 +138,7 @@ entry:
   ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.RawBuffer", i8, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 poison, i32 poison, i64 0)
   %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_i8_1_0t(target("dx.RawBuffer", i8, 1, 0) %buffer, i32 1, i32 poison, i32 poison, i64 0)
   ; CHECK: ret void
   ret void
@@ -153,7 +153,7 @@ entry:
   ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.RawBuffer", %struct.TestStruct64, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStruct64s_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 8, i32 undef, i64 0)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 8, i32 poison, i64 0)
   %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_s_struct.TestStruct64s_1_0t(target("dx.RawBuffer", %struct.TestStruct64, 1, 0) %buffer, i32 1, i32 8, i32 poison, i64 0)
   ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
   store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
@@ -168,7 +168,7 @@ entry:
   ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.RawBuffer", %struct.TestStruct64, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.TestStruct64s_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 8, i32 undef, i64 0)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 8, i32 poison, i64 0)
   %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.RawBuffer_s_struct.TestStruct64s_1_0t(target("dx.RawBuffer", %struct.TestStruct64, 1, 0) %buffer, i32 1, i32 8, i32 poison, i64 0)
   ; CHECK: ret void
   ret void
@@ -181,7 +181,7 @@ entry:
   ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.TypedBuffer", i64, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 poison, i32 poison, i64 0)
   %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i64_1_0_1t(target("dx.TypedBuffer", i64, 1, 0, 1) %buffer, i32 1, i32 poison, i32 poison, i64 0)
   ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
   store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
@@ -196,7 +196,7 @@ entry:
   ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.TypedBuffer", i64, 1, 0, 1) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 poison, i32 poison, i64 0)
   %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i64_1_0_1t(target("dx.TypedBuffer", i64, 1, 0, 1) %buffer, i32 1, i32 poison, i32 poison, i64 0)
   ; CHECK: ret void
   ret void
@@ -209,7 +209,7 @@ entry:
   ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 poison, i32 poison, i64 0)
   %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer", i64, 1, 0, 0) %buffer, i32 1, i32 poison, i32 poison, i64 0)
   ; CHECK: store i64 [[INTERLOCKED]], ptr [[RETURN]]
   store i64 %hlsl.interlocked.or, ptr %returnVal, align 8
@@ -224,7 +224,7 @@ entry:
   ; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217
   ; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
   %buffer = call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
-  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 undef, i32 undef, i64 0)
+  ; CHECK: [[INTERLOCKED:%.*]] = call i64 @dx.op.atomicBinOp.i64(i32 78, %dx.types.Handle [[HANDLE]], i32 2, i32 1, i32 poison, i32 poison, i64 0)
   %hlsl.interlocked.or = call i64 @llvm.dx.interlocked.or.i64.tdx.TypedBuffer_i64_1_0_0t(target("dx.TypedBuffer", i64, 1, 0, 0) %buffer, i32 1, i32 poison, i32 poison, i64 0)
   ; CHECK: ret void
   ret void



More information about the cfe-commits mailing list