[llvm] [DXIL] Define and generate `DXILAttribute` and `DXILProperty` (PR #117072)

Mon Dec 16 14:16:21 PST 2024

https://github.com/inbelic updated https://github.com/llvm/llvm-project/pull/117072

>From b7158254479d765fca97ef1b54876bcf4925ab2d Mon Sep 17 00:00:00 2001
From: Finn Plummer <canadienfinn at gmail.com>
Date: Thu, 14 Nov 2024 19:40:43 +0000
Subject: [PATCH 01/12] [DXIL] Define DXILAttribute

- switch to using DXILAttribute to only denote function attributes

- attribute enums can't be or'd together as is currently implemented, so
  we switch to using a list of attributes in DXILOpBuilder.cpp and
  DXILEmitter.cpp
---
 llvm/lib/Target/DirectX/DXIL.td           | 16 +++----
 llvm/lib/Target/DirectX/DXILConstants.h   |  5 +++
 llvm/lib/Target/DirectX/DXILOpBuilder.cpp | 28 +++++++++++-
 llvm/utils/TableGen/DXILEmitter.cpp       | 54 +++++++++++++----------
 4 files changed, 69 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index cff6cdce813ded..f6a0af4f8fa30e 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -266,18 +266,18 @@ def miss : DXILShaderStage;
 def all_stages : DXILShaderStage;
 // Denote support for DXIL Op to have been removed
 def removed : DXILShaderStage;
+
 // DXIL Op attributes
 
+// A function attribute denotes that there is a corresponding LLVM function
+// attribute that will be set when building the DXIL op. The mapping for
+// non-trivial cases is defined by setDXILAttribute in DXILOpBuilder.cpp
 class DXILAttribute;
 
-def ReadOnly : DXILAttribute;
 def ReadNone : DXILAttribute;
-def IsDerivative : DXILAttribute;
-def IsGradient : DXILAttribute;
-def IsFeedback : DXILAttribute;
-def IsWave : DXILAttribute;
-def NeedsUniformInputs : DXILAttribute;
-def IsBarrier : DXILAttribute;
+def ReadOnly : DXILAttribute;
+def NoDuplicate : DXILAttribute;
+def NoReturn : DXILAttribute;
 
 class Overloads<Version ver, list<DXILOpParamType> ols> {
   Version dxil_version = ver;
@@ -291,7 +291,7 @@ class Stages<Version ver, list<DXILShaderStage> st> {
 
 class Attributes<Version ver = DXIL1_0, list<DXILAttribute> attrs> {
   Version dxil_version = ver;
-  list<DXILAttribute> op_attrs = attrs;
+  list<DXILAttribute> fn_attrs = attrs;
 }
 
 defvar BarrierMode_DeviceMemoryBarrier              = 2;
diff --git a/llvm/lib/Target/DirectX/DXILConstants.h b/llvm/lib/Target/DirectX/DXILConstants.h
index 022cd57795a063..d93892f27a0d29 100644
--- a/llvm/lib/Target/DirectX/DXILConstants.h
+++ b/llvm/lib/Target/DirectX/DXILConstants.h
@@ -30,6 +30,11 @@ enum class OpParamType : unsigned {
 #include "DXILOperation.inc"
 };
 
+enum class Attribute : unsigned {
+#define DXIL_ATTRIBUTE(Name) Name,
+#include "DXILOperation.inc"
+};
+
 } // namespace dxil
 } // namespace llvm
 
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index 5d5bb3eacace25..f1b72cea75c5e7 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -54,7 +54,7 @@ struct OpStage {
 
 struct OpAttribute {
   Version DXILVersion;
-  uint32_t ValidAttrs;
+  llvm::SmallVector<dxil::Attribute> ValidAttrs;
 };
 
 static const char *getOverloadTypeName(OverloadKind Kind) {
@@ -367,6 +367,20 @@ static std::optional<size_t> getPropIndex(ArrayRef<T> PropList,
   return std::nullopt;
 }
 
+static void setDXILAttribute(CallInst *CI, dxil::Attribute Attr) {
+  switch (Attr) {
+  case dxil::Attribute::ReadNone:
+    return CI->setDoesNotAccessMemory();
+  case dxil::Attribute::ReadOnly:
+    return CI->setOnlyReadsMemory();
+  case dxil::Attribute::NoReturn:
+    return CI->setDoesNotReturn();
+  case dxil::Attribute::NoDuplicate:
+    return CI->setCannotDuplicate();
+  }
+  llvm_unreachable("Invalid function attribute specified for DXIL operation");
+}
+
 namespace llvm {
 namespace dxil {
 
@@ -461,7 +475,17 @@ Expected<CallInst *> DXILOpBuilder::tryCreateOp(dxil::OpCode OpCode,
   OpArgs.push_back(IRB.getInt32(llvm::to_underlying(OpCode)));
   OpArgs.append(Args.begin(), Args.end());
 
-  return IRB.CreateCall(DXILFn, OpArgs, Name);
+  // Create the function call instruction
+  CallInst *CI = IRB.CreateCall(DXILFn, OpArgs, Name);
+
+  // We then need to attach available function attributes
+  for (auto OpAttr : Prop->Attributes)
+    if (VersionTuple(OpAttr.DXILVersion.Major, OpAttr.DXILVersion.Minor) <=
+        DXILVersion)
+      for (auto Attr : OpAttr.ValidAttrs)
+        setDXILAttribute(CI, Attr);
+
+  return CI;
 }
 
 CallInst *DXILOpBuilder::createOp(dxil::OpCode OpCode, ArrayRef<Value *> Args,
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index a0c93bed5ad834..44be25e481824f 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -315,41 +315,37 @@ static std::string getStageMaskString(ArrayRef<const Record *> Recs) {
 // by input records
 //
 /// \param Recs A vector of records of TableGen Attribute records
-/// \return std::string string representation of stages mask string
+/// \return std::string string representation of attributes list string
 ///         predicated by DXIL Version. E.g.,
-//          {{{1, 0}, Mask1}, {{1, 2}, Mask2}, ...}
-static std::string getAttributeMaskString(ArrayRef<const Record *> Recs) {
-  std::string MaskString = "";
+//          {{{1, 0}, {Attr1, ...}}, {{1, 2}, {Attr2, ...}}, ...}
+static std::string getAttributeListString(ArrayRef<const Record *> Recs) {
+  std::string ListString = "";
   std::string Prefix = "";
-  MaskString.append("{");
+  ListString.append("{");
 
   for (const auto *Rec : Recs) {
     unsigned Major = Rec->getValueAsDef("dxil_version")->getValueAsInt("Major");
     unsigned Minor = Rec->getValueAsDef("dxil_version")->getValueAsInt("Minor");
-    MaskString.append(Prefix)
+    ListString.append(Prefix)
         .append("{{")
         .append(std::to_string(Major))
         .append(", ")
-        .append(std::to_string(Minor).append("}, "));
-
-    std::string PipePrefix = "";
-    auto Attrs = Rec->getValueAsListOfDefs("op_attrs");
-    if (Attrs.empty()) {
-      MaskString.append("Attribute::None");
-    } else {
-      for (const auto *Attr : Attrs) {
-        MaskString.append(PipePrefix)
-            .append("Attribute::")
-            .append(Attr->getName());
-        PipePrefix = " | ";
-      }
+        .append(std::to_string(Minor).append("}, {"));
+
+    std::string CommaPrefix = "";
+    auto Attrs = Rec->getValueAsListOfDefs("fn_attrs");
+    for (const auto *Attr : Attrs) {
+      ListString.append(CommaPrefix)
+          .append("dxil::Attribute::")
+          .append(Attr->getName());
+      CommaPrefix = ", ";
     }
-
-    MaskString.append("}");
+    ListString.append("}"); // End of Attrs
+    ListString.append("}"); // End of Rec
     Prefix = ", ";
   }
-  MaskString.append("}");
-  return MaskString;
+  ListString.append("}"); // End of List
+  return ListString;
 }
 
 /// Emit a mapping of DXIL opcode to opname
@@ -381,6 +377,15 @@ static void emitDXILOpParamTypes(const RecordKeeper &Records, raw_ostream &OS) {
   OS << "#endif\n\n";
 }
 
+/// Emit a list of DXIL op function attributes
+static void emitDXILAttributes(const RecordKeeper &Records, raw_ostream &OS) {
+  OS << "#ifdef DXIL_ATTRIBUTE\n";
+  for (const Record *Attr : Records.getAllDerivedDefinitions("DXILAttribute"))
+    OS << "DXIL_ATTRIBUTE(" << Attr->getName() << ")\n";
+  OS << "#undef DXIL_ATTRIBUTE\n";
+  OS << "#endif\n\n";
+}
+
 /// Emit a list of DXIL op function types
 static void emitDXILOpFunctionTypes(ArrayRef<DXILOperationDesc> Ops,
                                     raw_ostream &OS) {
@@ -477,7 +482,7 @@ static void emitDXILOperationTable(ArrayRef<DXILOperationDesc> Ops,
        << OpClassStrings.get(Op.OpClass.data()) << ", "
        << getOverloadMaskString(Op.OverloadRecs) << ", "
        << getStageMaskString(Op.StageRecs) << ", "
-       << getAttributeMaskString(Op.AttrRecs) << ", " << Op.OverloadParamIndex
+       << getAttributeListString(Op.AttrRecs) << ", " << Op.OverloadParamIndex
        << " }";
     Prefix = ",\n";
   }
@@ -582,6 +587,7 @@ static void emitDxilOperation(const RecordKeeper &Records, raw_ostream &OS) {
   emitDXILOpCodes(DXILOps, OS);
   emitDXILOpClasses(Records, OS);
   emitDXILOpParamTypes(Records, OS);
+  emitDXILAttributes(Records, OS);
   emitDXILOpFunctionTypes(DXILOps, OS);
   emitDXILIntrinsicArgSelectTypes(Records, OS);
   emitDXILIntrinsicMap(DXILOps, OS);

>From b27564e375e9f9afa5ed6f0e06dc8fd202888955 Mon Sep 17 00:00:00 2001
From: Finn Plummer <canadienfinn at gmail.com>
Date: Mon, 18 Nov 2024 22:02:58 +0000
Subject: [PATCH 02/12] correct ReadNone/ReadOnly function attributes

- correct all uses of ReadOnly/ReadNone to be consistent with hctdb.py
in DXC
- all fix order of attributes in each op
---
 llvm/lib/Target/DirectX/DXIL.td | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index f6a0af4f8fa30e..c08ff14a95afa0 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -783,6 +783,7 @@ def CreateHandle : DXILOp<57, createHandle> {
   let arguments = [Int8Ty, Int32Ty, Int32Ty, Int1Ty];
   let result = HandleTy;
   let stages = [Stages<DXIL1_0, [all_stages]>, Stages<DXIL1_6, [removed]>];
+  let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
 }
 
 def BufferLoad : DXILOp<68, bufferLoad> {
@@ -794,6 +795,7 @@ def BufferLoad : DXILOp<68, bufferLoad> {
       [Overloads<DXIL1_0,
                  [ResRetHalfTy, ResRetFloatTy, ResRetInt16Ty, ResRetInt32Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
 }
 
 def BufferStore : DXILOp<69, bufferStore> {
@@ -822,6 +824,7 @@ def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> {
   let result = Int1Ty;
   let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
 }
 
 def Discard : DXILOp<82, discard> {
@@ -896,8 +899,8 @@ def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> {
   let intrinsics = [ IntrinSelect<int_dx_dot4add_i8packed> ];
   let arguments = [Int32Ty, Int32Ty, Int32Ty];
   let result = Int32Ty;
-  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
 def Dot4AddU8Packed : DXILOp<164, dot4AddPacked> {
@@ -906,8 +909,8 @@ def Dot4AddU8Packed : DXILOp<164, dot4AddPacked> {
   let intrinsics = [ IntrinSelect<int_dx_dot4add_u8packed> ];
   let arguments = [Int32Ty, Int32Ty, Int32Ty];
   let result = Int32Ty;
-  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
 def AnnotateHandle : DXILOp<216, annotateHandle> {
@@ -915,6 +918,7 @@ def AnnotateHandle : DXILOp<216, annotateHandle> {
   let arguments = [HandleTy, ResPropsTy];
   let result = HandleTy;
   let stages = [Stages<DXIL1_6, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
 def CreateHandleFromBinding : DXILOp<217, createHandleFromBinding> {
@@ -922,6 +926,7 @@ def CreateHandleFromBinding : DXILOp<217, createHandleFromBinding> {
   let arguments = [ResBindTy, Int32Ty, Int1Ty];
   let result = HandleTy;
   let stages = [Stages<DXIL1_6, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
 def WaveActiveAnyTrue : DXILOp<113, waveAnyTrue> {
@@ -938,7 +943,6 @@ def WaveIsFirstLane :  DXILOp<110, waveIsFirstLane> {
   let arguments = [];
   let result = Int1Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
-  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
 def WaveReadLaneAt:  DXILOp<117, waveReadLaneAt> {
@@ -948,7 +952,6 @@ def WaveReadLaneAt:  DXILOp<117, waveReadLaneAt> {
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy, Int1Ty, Int16Ty, Int32Ty, Int64Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
-  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
 def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> {
@@ -957,7 +960,7 @@ def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> {
   let arguments = [];
   let result = Int32Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
-  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+  let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
 }
 
 def WaveAllBitCount : DXILOp<135, waveAllOp> {
@@ -966,7 +969,6 @@ def WaveAllBitCount : DXILOp<135, waveAllOp> {
   let arguments = [Int1Ty];
   let result = Int32Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
-  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
 def Barrier : DXILOp<80, barrier> {

>From fde0d22a78f167df7103bb753dfcedd462f684b1 Mon Sep 17 00:00:00 2001
From: Finn Plummer <canadienfinn at gmail.com>
Date: Mon, 18 Nov 2024 22:03:53 +0000
Subject: [PATCH 03/12] update testcases to check attributes

- testcases are update to check updated attribute types
- there are some CHECK-NOT tests to ensure that previously set
attributes no longer emit an attribute
---
 llvm/test/CodeGen/DirectX/BufferLoad.ll       | 24 ++++++++-------
 llvm/test/CodeGen/DirectX/BufferStore.ll      |  8 ++---
 llvm/test/CodeGen/DirectX/CreateHandle.ll     | 14 +++++----
 .../DirectX/CreateHandleFromBinding.ll        | 26 ++++++++--------
 .../CodeGen/DirectX/WaveActiveCountBits.ll    |  2 +-
 llvm/test/CodeGen/DirectX/WaveGetLaneIndex.ll |  4 ++-
 .../CodeGen/DirectX/WaveReadLaneAt-vec.ll     | 18 +++++------
 llvm/test/CodeGen/DirectX/WaveReadLaneAt.ll   | 16 +++++-----
 llvm/test/CodeGen/DirectX/abs.ll              |  8 +++--
 llvm/test/CodeGen/DirectX/acos.ll             | 14 +++++----
 llvm/test/CodeGen/DirectX/asin.ll             | 14 +++++----
 llvm/test/CodeGen/DirectX/atan.ll             | 14 +++++----
 .../CodeGen/DirectX/bufferUpdateCounter.ll    |  6 ++--
 llvm/test/CodeGen/DirectX/ceil.ll             | 14 +++++----
 llvm/test/CodeGen/DirectX/comput_ids.ll       | 10 ++++---
 llvm/test/CodeGen/DirectX/cos.ll              | 14 +++++----
 llvm/test/CodeGen/DirectX/cosh.ll             | 14 +++++----
 llvm/test/CodeGen/DirectX/countbits.ll        | 22 +++++++-------
 llvm/test/CodeGen/DirectX/dot4add_i8packed.ll |  4 ++-
 llvm/test/CodeGen/DirectX/dot4add_u8packed.ll |  4 ++-
 llvm/test/CodeGen/DirectX/exp.ll              |  6 ++--
 llvm/test/CodeGen/DirectX/fdot.ll             | 14 +++++----
 llvm/test/CodeGen/DirectX/firstbithigh.ll     | 30 ++++++++++---------
 llvm/test/CodeGen/DirectX/floor.ll            | 14 +++++----
 llvm/test/CodeGen/DirectX/fmad.ll             |  7 +++--
 llvm/test/CodeGen/DirectX/fmax.ll             |  8 +++--
 llvm/test/CodeGen/DirectX/fmin.ll             |  8 +++--
 llvm/test/CodeGen/DirectX/frac.ll             | 14 +++++----
 llvm/test/CodeGen/DirectX/idot.ll             | 22 +++++++-------
 llvm/test/CodeGen/DirectX/imad.ll             |  8 +++--
 llvm/test/CodeGen/DirectX/isinf.ll            |  5 ++--
 llvm/test/CodeGen/DirectX/log.ll              |  6 ++--
 llvm/test/CodeGen/DirectX/log10.ll            |  6 ++--
 llvm/test/CodeGen/DirectX/log2.ll             |  6 ++--
 llvm/test/CodeGen/DirectX/reversebits.ll      | 16 +++++-----
 llvm/test/CodeGen/DirectX/round.ll            | 13 ++++----
 llvm/test/CodeGen/DirectX/rsqrt.ll            | 13 ++++----
 llvm/test/CodeGen/DirectX/saturate.ll         |  8 +++--
 llvm/test/CodeGen/DirectX/sin.ll              | 14 +++++----
 llvm/test/CodeGen/DirectX/sinh.ll             | 14 +++++----
 llvm/test/CodeGen/DirectX/smax.ll             |  8 +++--
 llvm/test/CodeGen/DirectX/smin.ll             |  8 +++--
 llvm/test/CodeGen/DirectX/splitdouble.ll      | 14 +++++----
 llvm/test/CodeGen/DirectX/sqrt.ll             | 14 +++++----
 llvm/test/CodeGen/DirectX/tan.ll              | 14 +++++----
 llvm/test/CodeGen/DirectX/tanh.ll             | 14 +++++----
 llvm/test/CodeGen/DirectX/trunc.ll            | 14 +++++----
 llvm/test/CodeGen/DirectX/umad.ll             |  8 +++--
 llvm/test/CodeGen/DirectX/umax.ll             |  8 +++--
 llvm/test/CodeGen/DirectX/umin.ll             |  8 +++--
 .../CodeGen/DirectX/wave_is_first_lane.ll     |  2 ++
 51 files changed, 342 insertions(+), 252 deletions(-)

diff --git a/llvm/test/CodeGen/DirectX/BufferLoad.ll b/llvm/test/CodeGen/DirectX/BufferLoad.ll
index 24d65fe1648c15..874c81df29b64a 100644
--- a/llvm/test/CodeGen/DirectX/BufferLoad.ll
+++ b/llvm/test/CodeGen/DirectX/BufferLoad.ll
@@ -16,7 +16,7 @@ define void @loadv4f32() {
   ; The temporary casts should all have been cleaned up
   ; CHECK-NOT: %dx.cast_handle
 
-  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
+  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) #[[#ATTR:]]
   %data0 = call <4 x float> @llvm.dx.typedBufferLoad(
       target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 0)
 
@@ -33,7 +33,7 @@ define void @loadv4f32() {
   call void @scalar_user(float %data0_0)
   call void @scalar_user(float %data0_2)
 
-  ; CHECK: [[DATA4:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 4, i32 undef)
+  ; CHECK: [[DATA4:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 4, i32 undef) #[[#ATTR]]
   %data4 = call <4 x float> @llvm.dx.typedBufferLoad(
       target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 4)
 
@@ -47,7 +47,7 @@ define void @loadv4f32() {
   ; CHECK: insertelement <4 x float>
   call void @vector_user(<4 x float> %data4)
 
-  ; CHECK: [[DATA12:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 12, i32 undef)
+  ; CHECK: [[DATA12:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 12, i32 undef) #[[#ATTR]]
   %data12 = call <4 x float> @llvm.dx.typedBufferLoad(
       target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 12)
 
@@ -69,7 +69,7 @@ define void @index_dynamic(i32 %bufindex, i32 %elemindex) {
       @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0(
           i32 0, i32 0, i32 1, i32 0, i1 false)
 
-  ; CHECK: [[LOAD:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 %bufindex, i32 undef)
+  ; CHECK: [[LOAD:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 %bufindex, i32 undef) #[[#ATTR]]
   %load = call <4 x float> @llvm.dx.typedBufferLoad(
       target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 %bufindex)
 
@@ -104,7 +104,7 @@ define void @loadf32() {
       @llvm.dx.handle.fromBinding.tdx.TypedBuffer_f32_0_0_0(
           i32 0, i32 0, i32 1, i32 0, i1 false)
 
-  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
+  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) #[[#ATTR]]
   %data0 = call float @llvm.dx.typedBufferLoad(
       target("dx.TypedBuffer", float, 0, 0, 0) %buffer, i32 0)
 
@@ -122,7 +122,7 @@ define void @loadv2f32() {
       @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v2f32_0_0_0(
           i32 0, i32 0, i32 1, i32 0, i1 false)
 
-  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
+  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) #[[#ATTR]]
   %data0 = call <2 x float> @llvm.dx.typedBufferLoad(
       target("dx.TypedBuffer", <2 x float>, 0, 0, 0) %buffer, i32 0)
 
@@ -136,12 +136,12 @@ define void @loadv4f32_checkbit() {
       @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0(
           i32 0, i32 0, i32 1, i32 0, i1 false)
 
-  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
+  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) #[[#ATTR]]
   %data0 = call {<4 x float>, i1} @llvm.dx.typedBufferLoad.checkbit.f32(
       target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 0)
 
   ; CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA0]], 4
-  ; CHECK: [[MAPPED:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]
+  ; CHECK: [[MAPPED:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) #[[#ATTR]]
   %check = extractvalue {<4 x float>, i1} %data0, 1
 
   ; CHECK: call void @check_user(i1 [[MAPPED]])
@@ -157,7 +157,7 @@ define void @loadv4i32() {
       @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4i32_0_0_0(
           i32 0, i32 0, i32 1, i32 0, i1 false)
 
-  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
+  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) #[[#ATTR]]
   %data0 = call <4 x i32> @llvm.dx.typedBufferLoad(
       target("dx.TypedBuffer", <4 x i32>, 0, 0, 0) %buffer, i32 0)
 
@@ -171,7 +171,7 @@ define void @loadv4f16() {
       @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f16_0_0_0(
           i32 0, i32 0, i32 1, i32 0, i1 false)
 
-  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f16 @dx.op.bufferLoad.f16(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
+  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f16 @dx.op.bufferLoad.f16(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) #[[#ATTR]]
   %data0 = call <4 x half> @llvm.dx.typedBufferLoad(
       target("dx.TypedBuffer", <4 x half>, 0, 0, 0) %buffer, i32 0)
 
@@ -185,9 +185,11 @@ define void @loadv4i16() {
       @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4i16_0_0_0(
           i32 0, i32 0, i32 1, i32 0, i1 false)
 
-  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.i16 @dx.op.bufferLoad.i16(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
+  ; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.i16 @dx.op.bufferLoad.i16(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef) #[[#ATTR]]
   %data0 = call <4 x i16> @llvm.dx.typedBufferLoad(
       target("dx.TypedBuffer", <4 x i16>, 0, 0, 0) %buffer, i32 0)
 
   ret void
 }
+
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(read) {{.*}}}
diff --git a/llvm/test/CodeGen/DirectX/BufferStore.ll b/llvm/test/CodeGen/DirectX/BufferStore.ll
index 81cc5fd328e0a7..b88c85ac19bc6f 100644
--- a/llvm/test/CodeGen/DirectX/BufferStore.ll
+++ b/llvm/test/CodeGen/DirectX/BufferStore.ll
@@ -17,7 +17,7 @@ define void @storefloat(<4 x float> %data, i32 %index) {
   ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x float> %data, i32 1
   ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x float> %data, i32 2
   ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x float> %data, i32 3
-  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, float [[DATA0_0]], float [[DATA0_1]], float [[DATA0_2]], float [[DATA0_3]], i8 15)
+  ; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, float [[DATA0_0]], float [[DATA0_1]], float [[DATA0_2]], float [[DATA0_3]], i8 15){{$}}
   call void @llvm.dx.typedBufferStore(
       target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer,
       i32 %index, <4 x float> %data)
@@ -37,7 +37,7 @@ define void @storeint(<4 x i32> %data, i32 %index) {
   ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x i32> %data, i32 1
   ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x i32> %data, i32 2
   ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x i32> %data, i32 3
-  ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i32 [[DATA0_0]], i32 [[DATA0_1]], i32 [[DATA0_2]], i32 [[DATA0_3]], i8 15)
+  ; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i32 [[DATA0_0]], i32 [[DATA0_1]], i32 [[DATA0_2]], i32 [[DATA0_3]], i8 15){{$}}
   call void @llvm.dx.typedBufferStore(
       target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) %buffer,
       i32 %index, <4 x i32> %data)
@@ -60,7 +60,7 @@ define void @storehalf(<4 x half> %data, i32 %index) {
   ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x half> %data, i32 1
   ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x half> %data, i32 2
   ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x half> %data, i32 3
-  ; CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, half [[DATA0_0]], half [[DATA0_1]], half [[DATA0_2]], half [[DATA0_3]], i8 15)
+  ; CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, half [[DATA0_0]], half [[DATA0_1]], half [[DATA0_2]], half [[DATA0_3]], i8 15){{$}}
   call void @llvm.dx.typedBufferStore(
       target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer,
       i32 %index, <4 x half> %data)
@@ -83,7 +83,7 @@ define void @storei16(<4 x i16> %data, i32 %index) {
   ; CHECK: [[DATA0_1:%.*]] = extractelement <4 x i16> %data, i32 1
   ; CHECK: [[DATA0_2:%.*]] = extractelement <4 x i16> %data, i32 2
   ; CHECK: [[DATA0_3:%.*]] = extractelement <4 x i16> %data, i32 3
-  ; CHECK: call void @dx.op.bufferStore.i16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i16 [[DATA0_0]], i16 [[DATA0_1]], i16 [[DATA0_2]], i16 [[DATA0_3]], i8 15)
+  ; CHECK: call void @dx.op.bufferStore.i16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i16 [[DATA0_0]], i16 [[DATA0_1]], i16 [[DATA0_2]], i16 [[DATA0_3]], i8 15){{$}}
   call void @llvm.dx.typedBufferStore(
       target("dx.TypedBuffer", <4 x i16>, 1, 0, 0) %buffer,
       i32 %index, <4 x i16> %data)
diff --git a/llvm/test/CodeGen/DirectX/CreateHandle.ll b/llvm/test/CodeGen/DirectX/CreateHandle.ll
index 234d4e035bf1d5..695433d9cdfa16 100644
--- a/llvm/test/CodeGen/DirectX/CreateHandle.ll
+++ b/llvm/test/CodeGen/DirectX/CreateHandle.ll
@@ -19,14 +19,14 @@ define void @test_buffers() {
   %typed0 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
               @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0(
                   i32 3, i32 5, i32 1, i32 0, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 5, i1 false)
+  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 5, i1 false) #[[#ATTR:]]
   ; CHECK-NOT: @llvm.dx.cast.handle
 
   ; RWBuffer<int> Buf : register(u7, space2)
   %typed1 = call target("dx.TypedBuffer", i32, 1, 0, 1)
       @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_1_0_1t(
           i32 2, i32 7, i32 1, i32 0, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 7, i1 false)
+  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 7, i1 false) #[[#ATTR]]
 
   ; Buffer<uint4> Buf[24] : register(t3, space5)
   ; Buffer<uint4> typed2 = Buf[4]
@@ -34,20 +34,20 @@ define void @test_buffers() {
   %typed2 = call target("dx.TypedBuffer", <4 x i32>, 0, 0, 0)
       @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_0_0_0t(
           i32 5, i32 3, i32 24, i32 4, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 3, i32 7, i1 false)
+  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 3, i32 7, i1 false) #[[#ATTR]]
 
   ; struct S { float4 a; uint4 b; };
   ; StructuredBuffer<S> Buf : register(t2, space4)
   %struct0 = call target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 0, 0)
       @llvm.dx.handle.fromBinding.tdx.RawBuffer_sl_v4f32v4i32s_0_0t(
           i32 4, i32 2, i32 1, i32 0, i1 true)
-  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 2, i32 2, i1 true)
+  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 2, i32 2, i1 true) #[[#ATTR]]
 
   ; ByteAddressBuffer Buf : register(t8, space1)
   %byteaddr0 = call target("dx.RawBuffer", i8, 0, 0)
       @llvm.dx.handle.fromBinding.tdx.RawBuffer_i8_0_0t(
           i32 1, i32 8, i32 1, i32 0, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 1, i32 8, i1 false)
+  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 1, i32 8, i1 false) #[[#ATTR]]
 
   ; Buffer<float4> Buf[] : register(t7)
   ; Buffer<float4> typed3 = Buf[ix]
@@ -56,11 +56,13 @@ define void @test_buffers() {
       @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0t(
           i32 0, i32 7, i32 -1, i32 %typed3_ix, i1 false)
   ; CHECK: %[[IX:.*]] = add i32 %typed3_ix, 7
-  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 %[[IX]], i1 false)
+  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 %[[IX]], i1 false) #[[#ATTR]]
 
   ret void
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(read) {{.*}}}
+
 ; Just check that we have the right types and number of metadata nodes, the
 ; contents of the metadata are tested elsewhere.
 ;
diff --git a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll
index aa143dfa8211d0..66dc47e17ee0f4 100644
--- a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll
+++ b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll
@@ -19,15 +19,15 @@ define void @test_bindings() {
   %typed0 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
               @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0(
                   i32 3, i32 5, i32 1, i32 0, i1 false)
-  ; CHECK: [[BUF0:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 3, i8 1 }, i32 5, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF0]], %dx.types.ResourceProperties { i32 4106, i32 1033 })
+  ; CHECK: [[BUF0:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 5, i32 5, i32 3, i8 1 }, i32 5, i1 false) #[[#ATTR:]]
+  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF0]], %dx.types.ResourceProperties { i32 4106, i32 1033 }) #[[#ATTR]]
 
   ; RWBuffer<int> Buf : register(u7, space2)
   %typed1 = call target("dx.TypedBuffer", i32, 1, 0, 1)
       @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_1_0_0t(
           i32 2, i32 7, i32 1, i32 0, i1 false)
-  ; CHECK: [[BUF1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 7, i32 2, i8 1 }, i32 7, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF1]], %dx.types.ResourceProperties { i32 4106, i32 260 })
+  ; CHECK: [[BUF1:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 7, i32 2, i8 1 }, i32 7, i1 false) #[[#ATTR]]
+  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF1]], %dx.types.ResourceProperties { i32 4106, i32 260 }) #[[#ATTR]]
 
   ; Buffer<uint4> Buf[24] : register(t3, space5)
   ; Buffer<uint4> typed2 = Buf[4]
@@ -35,23 +35,23 @@ define void @test_bindings() {
   %typed2 = call target("dx.TypedBuffer", <4 x i32>, 0, 0, 0)
       @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_0_0_0t(
           i32 5, i32 3, i32 24, i32 4, i1 false)
-  ; CHECK: [[BUF2:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 26, i32 5, i8 0 }, i32 7, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF2]], %dx.types.ResourceProperties { i32 10, i32 1029 })
+  ; CHECK: [[BUF2:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 3, i32 26, i32 5, i8 0 }, i32 7, i1 false) #[[#ATTR]]
+  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF2]], %dx.types.ResourceProperties { i32 10, i32 1029 }) #[[#ATTR]]
 
   ; struct S { float4 a; uint4 b; };
   ; StructuredBuffer<S> Buf : register(t2, space4)
   %struct0 = call target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 0, 0)
       @llvm.dx.handle.fromBinding.tdx.RawBuffer_sl_v4f32v4i32s_0_0t(
           i32 4, i32 2, i32 1, i32 0, i1 true)
-  ; CHECK: [[BUF3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 4, i8 0 }, i32 2, i1 true)
-  ; CHECK: = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF3]], %dx.types.ResourceProperties { i32 1036, i32 32 })
+  ; CHECK: [[BUF3:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 2, i32 2, i32 4, i8 0 }, i32 2, i1 true) #[[#ATTR]]
+  ; CHECK: = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF3]], %dx.types.ResourceProperties { i32 1036, i32 32 }) #[[#ATTR]]
 
   ; ByteAddressBuffer Buf : register(t8, space1)
   %byteaddr0 = call target("dx.RawBuffer", i8, 0, 0)
       @llvm.dx.handle.fromBinding.tdx.RawBuffer_i8_0_0t(
           i32 1, i32 8, i32 1, i32 0, i1 false)
-  ; CHECK: [[BUF4:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 8, i32 8, i32 1, i8 0 }, i32 8, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF4]], %dx.types.ResourceProperties { i32 11, i32 0 })
+  ; CHECK: [[BUF4:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 8, i32 8, i32 1, i8 0 }, i32 8, i1 false) #[[#ATTR]]
+  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF4]], %dx.types.ResourceProperties { i32 11, i32 0 }) #[[#ATTR]]
 
   ; Buffer<float4> Buf[] : register(t7)
   ; Buffer<float4> typed3 = Buf[ix]
@@ -60,12 +60,14 @@ define void @test_bindings() {
       @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0t(
           i32 0, i32 7, i32 -1, i32 %typed3_ix, i1 false)
   ; CHECK: %[[IX:.*]] = add i32 %typed3_ix, 7
-  ; CHECK: [[BUF5:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 -1, i32 0, i8 0 }, i32 %[[IX]], i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF5]], %dx.types.ResourceProperties { i32 10, i32 1033 })
+  ; CHECK: [[BUF5:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 7, i32 -1, i32 0, i8 0 }, i32 %[[IX]], i1 false) #[[#ATTR]]
+  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BUF5]], %dx.types.ResourceProperties { i32 10, i32 1033 }) #[[#ATTR]]
 
   ret void
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 ; Just check that we have the right types and number of metadata nodes, the
 ; contents of the metadata are tested elsewhere.
 ;
diff --git a/llvm/test/CodeGen/DirectX/WaveActiveCountBits.ll b/llvm/test/CodeGen/DirectX/WaveActiveCountBits.ll
index 5d321372433198..35ca5f2435b1c6 100644
--- a/llvm/test/CodeGen/DirectX/WaveActiveCountBits.ll
+++ b/llvm/test/CodeGen/DirectX/WaveActiveCountBits.ll
@@ -2,7 +2,7 @@
 
 define void @main(i1 %expr) {
 entry:
-; CHECK: call i32 @dx.op.waveAllOp(i32 135, i1 %expr)
+; CHECK: call i32 @dx.op.waveAllOp(i32 135, i1 %expr){{$}}
   %0 = call i32 @llvm.dx.wave.active.countbits(i1 %expr)
   ret void
 }
diff --git a/llvm/test/CodeGen/DirectX/WaveGetLaneIndex.ll b/llvm/test/CodeGen/DirectX/WaveGetLaneIndex.ll
index 86b7ea4f962f77..df9fa6fefa1329 100644
--- a/llvm/test/CodeGen/DirectX/WaveGetLaneIndex.ll
+++ b/llvm/test/CodeGen/DirectX/WaveGetLaneIndex.ll
@@ -2,9 +2,11 @@
 
 define void @main() {
 entry:
-; CHECK: call i32 @dx.op.waveGetLaneIndex(i32 111)
+; CHECK: call i32 @dx.op.waveGetLaneIndex(i32 111) #[[#ATTR:]]
   %0 = call i32 @llvm.dx.wave.getlaneindex()
   ret void
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(read) {{.*}}}
+
 declare i32 @llvm.dx.wave.getlaneindex()
diff --git a/llvm/test/CodeGen/DirectX/WaveReadLaneAt-vec.ll b/llvm/test/CodeGen/DirectX/WaveReadLaneAt-vec.ll
index 8c2a11a3557af3..571f31c3c9c64b 100644
--- a/llvm/test/CodeGen/DirectX/WaveReadLaneAt-vec.ll
+++ b/llvm/test/CodeGen/DirectX/WaveReadLaneAt-vec.ll
@@ -5,27 +5,27 @@
 
 define noundef <2 x half> @wave_read_lane_v2half(<2 x half> noundef %expr, i32 %idx) {
 entry:
-; CHECK: call half @dx.op.waveReadLaneAt.f16(i32 117, half %expr.i0, i32 %idx)
-; CHECK: call half @dx.op.waveReadLaneAt.f16(i32 117, half %expr.i1, i32 %idx)
+; CHECK: call half @dx.op.waveReadLaneAt.f16(i32 117, half %expr.i0, i32 %idx){{$}}
+; CHECK: call half @dx.op.waveReadLaneAt.f16(i32 117, half %expr.i1, i32 %idx){{$}}
   %ret = call <2 x half> @llvm.dx.wave.readlane.f16(<2 x half> %expr, i32 %idx)
   ret <2 x half> %ret
 }
 
 define noundef <3 x i32> @wave_read_lane_v3i32(<3 x i32> noundef %expr, i32 %idx) {
 entry:
-; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr.i0, i32 %idx)
-; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr.i1, i32 %idx)
-; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr.i2, i32 %idx)
+; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr.i0, i32 %idx){{$}}
+; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr.i1, i32 %idx){{$}}
+; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr.i2, i32 %idx){{$}}
   %ret = call <3 x i32> @llvm.dx.wave.readlane(<3 x i32> %expr, i32 %idx)
   ret <3 x i32> %ret
 }
 
 define noundef <4 x double> @wave_read_lane_v4f64(<4 x double> noundef %expr, i32 %idx) {
 entry:
-; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i0, i32 %idx)
-; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i1, i32 %idx)
-; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i2, i32 %idx)
-; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i3, i32 %idx)
+; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i0, i32 %idx){{$}}
+; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i1, i32 %idx){{$}}
+; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i2, i32 %idx){{$}}
+; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i3, i32 %idx){{$}}
   %ret = call <4 x double> @llvm.dx.wave.readlane(<4 x double> %expr, i32 %idx)
   ret <4 x double> %ret
 }
diff --git a/llvm/test/CodeGen/DirectX/WaveReadLaneAt.ll b/llvm/test/CodeGen/DirectX/WaveReadLaneAt.ll
index 0024ba66c0cad8..548117d431ff21 100644
--- a/llvm/test/CodeGen/DirectX/WaveReadLaneAt.ll
+++ b/llvm/test/CodeGen/DirectX/WaveReadLaneAt.ll
@@ -4,53 +4,55 @@
 
 define noundef half @wave_rla_half(half noundef %expr, i32 noundef %idx) {
 entry:
-; CHECK: call half @dx.op.waveReadLaneAt.f16(i32 117, half %expr, i32 %idx)
+; CHECK: call half @dx.op.waveReadLaneAt.f16(i32 117, half %expr, i32 %idx){{$}}
   %ret = call half @llvm.dx.wave.readlane.f16(half %expr, i32 %idx)
   ret half %ret
 }
 
 define noundef float @wave_rla_float(float noundef %expr, i32 noundef %idx) {
 entry:
-; CHECK: call float @dx.op.waveReadLaneAt.f32(i32 117, float %expr, i32 %idx)
+; CHECK: call float @dx.op.waveReadLaneAt.f32(i32 117, float %expr, i32 %idx){{$}}
   %ret = call float @llvm.dx.wave.readlane(float %expr, i32 %idx)
   ret float %ret
 }
 
 define noundef double @wave_rla_double(double noundef %expr, i32 noundef %idx) {
 entry:
-; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr, i32 %idx)
+; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr, i32 %idx){{$}}
   %ret = call double @llvm.dx.wave.readlane(double %expr, i32 %idx)
   ret double %ret
 }
 
 define noundef i1 @wave_rla_i1(i1 noundef %expr, i32 noundef %idx) {
 entry:
-; CHECK: call i1 @dx.op.waveReadLaneAt.i1(i32 117, i1 %expr, i32 %idx)
+; CHECK: call i1 @dx.op.waveReadLaneAt.i1(i32 117, i1 %expr, i32 %idx){{$}}
   %ret = call i1 @llvm.dx.wave.readlane.i1(i1 %expr, i32 %idx)
   ret i1 %ret
 }
 
 define noundef i16 @wave_rla_i16(i16 noundef %expr, i32 noundef %idx) {
 entry:
-; CHECK: call i16 @dx.op.waveReadLaneAt.i16(i32 117, i16 %expr, i32 %idx)
+; CHECK: call i16 @dx.op.waveReadLaneAt.i16(i32 117, i16 %expr, i32 %idx){{$}}
   %ret = call i16 @llvm.dx.wave.readlane.i16(i16 %expr, i32 %idx)
   ret i16 %ret
 }
 
 define noundef i32 @wave_rla_i32(i32 noundef %expr, i32 noundef %idx) {
 entry:
-; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr, i32 %idx)
+; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr, i32 %idx){{$}}
   %ret = call i32 @llvm.dx.wave.readlane.i32(i32 %expr, i32 %idx)
   ret i32 %ret
 }
 
 define noundef i64 @wave_rla_i64(i64 noundef %expr, i32 noundef %idx) {
 entry:
-; CHECK: call i64 @dx.op.waveReadLaneAt.i64(i32 117, i64 %expr, i32 %idx)
+; CHECK: call i64 @dx.op.waveReadLaneAt.i64(i32 117, i64 %expr, i32 %idx){{$}}
   %ret = call i64 @llvm.dx.wave.readlane.i64(i64 %expr, i32 %idx)
   ret i64 %ret
 }
 
+; CHECK-NOT: attributes {{.*}} memory(none)
+
 declare half @llvm.dx.wave.readlane.f16(half, i32)
 declare float @llvm.dx.wave.readlane.f32(float, i32)
 declare double @llvm.dx.wave.readlane.f64(double, i32)
diff --git a/llvm/test/CodeGen/DirectX/abs.ll b/llvm/test/CodeGen/DirectX/abs.ll
index 34464e9db14cbf..500facc959de53 100644
--- a/llvm/test/CodeGen/DirectX/abs.ll
+++ b/llvm/test/CodeGen/DirectX/abs.ll
@@ -8,7 +8,7 @@ define noundef i16 @abs_i16(i16 noundef %a) {
 entry:
 ; CHECK: sub i16 0, %a
 ; EXPCHECK: call i16 @llvm.smax.i16(i16 %a, i16 %{{.*}})
-; DOPCHECK: call i16 @dx.op.binary.i16(i32 37, i16 %a, i16 %{{.*}})
+; DOPCHECK: call i16 @dx.op.binary.i16(i32 37, i16 %a, i16 %{{.*}}) #[[#ATTR:]]
   %elt.abs = call i16 @llvm.abs.i16(i16 %a, i1 false)
   ret i16 %elt.abs
 }
@@ -18,7 +18,7 @@ define noundef i32 @abs_i32(i32 noundef %a) {
 entry:
 ; CHECK: sub i32 0, %a
 ; EXPCHECK: call i32 @llvm.smax.i32(i32 %a, i32 %{{.*}})
-; DOPCHECK: call i32 @dx.op.binary.i32(i32 37, i32 %a, i32 %{{.*}})
+; DOPCHECK: call i32 @dx.op.binary.i32(i32 37, i32 %a, i32 %{{.*}}) #[[#ATTR]]
   %elt.abs = call i32 @llvm.abs.i32(i32 %a, i1 false)
   ret i32 %elt.abs
 }
@@ -28,11 +28,13 @@ define noundef i64 @abs_i64(i64 noundef %a) {
 entry:
 ; CHECK: sub i64 0, %a
 ; EXPCHECK: call i64 @llvm.smax.i64(i64 %a, i64 %{{.*}})
-; DOPCHECK: call i64 @dx.op.binary.i64(i32 37, i64 %a, i64 %{{.*}})
+; DOPCHECK: call i64 @dx.op.binary.i64(i32 37, i64 %a, i64 %{{.*}}) #[[#ATTR]]
   %elt.abs = call i64 @llvm.abs.i64(i64 %a, i1 false)
   ret i64 %elt.abs
 }
 
+; DOPCHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i16 @llvm.abs.i16(i16, i1 immarg)
 declare i32 @llvm.abs.i32(i32, i1 immarg)
 declare i64 @llvm.abs.i64(i64, i1 immarg)
diff --git a/llvm/test/CodeGen/DirectX/acos.ll b/llvm/test/CodeGen/DirectX/acos.ll
index f4a10eb368ebfb..fe8e44610ee657 100644
--- a/llvm/test/CodeGen/DirectX/acos.ll
+++ b/llvm/test/CodeGen/DirectX/acos.ll
@@ -4,14 +4,14 @@
 
 define noundef float @acos_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 15, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 15, float %{{.*}}) #[[#ATTR:]]
   %elt.acos = call float @llvm.acos.f32(float %a)
   ret float %elt.acos
 }
 
 define noundef half @acos_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 15, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 15, half %{{.*}}) #[[#ATTR]]
   %elt.acos = call half @llvm.acos.f16(half %a)
   ret half %elt.acos
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @acos_float4(<4 x float> noundef %a) {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.acos.f16(half)
 declare float @llvm.acos.f32(float)
 declare <4 x float> @llvm.acos.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/asin.ll b/llvm/test/CodeGen/DirectX/asin.ll
index bd948f593c24e2..a6ce185d5e3a92 100644
--- a/llvm/test/CodeGen/DirectX/asin.ll
+++ b/llvm/test/CodeGen/DirectX/asin.ll
@@ -4,14 +4,14 @@
 
 define noundef float @asin_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 16, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 16, float %{{.*}}) #[[#ATTR:]]
   %elt.asin = call float @llvm.asin.f32(float %a)
   ret float %elt.asin
 }
 
 define noundef half @asin_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 16, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 16, half %{{.*}}) #[[#ATTR]]
   %elt.asin = call half @llvm.asin.f16(half %a)
   ret half %elt.asin
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @asin_float4(<4 x float> noundef %a) {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.asin.f16(half)
 declare float @llvm.asin.f32(float)
 declare <4 x float> @llvm.asin.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/atan.ll b/llvm/test/CodeGen/DirectX/atan.ll
index 58899ab49bdb8e..228ccce03eeccc 100644
--- a/llvm/test/CodeGen/DirectX/atan.ll
+++ b/llvm/test/CodeGen/DirectX/atan.ll
@@ -4,14 +4,14 @@
 
 define noundef float @atan_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 17, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 17, float %{{.*}}) #[[#ATTR:]]
   %elt.atan = call float @llvm.atan.f32(float %a)
   ret float %elt.atan
 }
 
 define noundef half @atan_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 17, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 17, half %{{.*}}) #[[#ATTR]]
   %elt.atan = call half @llvm.atan.f16(half %a)
   ret half %elt.atan
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @atan_float4(<4 x float> noundef %a) {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.atan.f16(half)
 declare float @llvm.atan.f32(float)
 declare <4 x float> @llvm.atan.v4f32(<4 x float>) 
diff --git a/llvm/test/CodeGen/DirectX/bufferUpdateCounter.ll b/llvm/test/CodeGen/DirectX/bufferUpdateCounter.ll
index 3f2610649cba17..8e63082ca19843 100644
--- a/llvm/test/CodeGen/DirectX/bufferUpdateCounter.ll
+++ b/llvm/test/CodeGen/DirectX/bufferUpdateCounter.ll
@@ -11,7 +11,7 @@ define void @update_counter_decrement_vector() {
           i32 0, i32 0, i32 1, i32 0, i1 false)
 
  ; CHECK-NEXT: [[BUFFANOT:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
- ; CHECK-NEXT: [[REG:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 -1)
+ ; CHECK-NEXT: [[REG:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 -1){{$}}
   %1 = call i32 @llvm.dx.bufferUpdateCounter(target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i8 -1)
   ret void
 }
@@ -23,7 +23,7 @@ define void @update_counter_increment_vector() {
       @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0(
           i32 0, i32 0, i32 1, i32 0, i1 false)
   ; CHECK-NEXT: [[BUFFANOT:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
-  ; CHECK-NEXT: [[REG:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 1)
+  ; CHECK-NEXT: [[REG:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 1){{$}}
   %1 = call i32 @llvm.dx.bufferUpdateCounter(target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i8 1)
   ret void
 }
@@ -35,7 +35,7 @@ define void @update_counter_decrement_scalar() {
       @llvm.dx.handle.fromBinding.tdx.RawBuffer_i8_0_0t(
           i32 1, i32 8, i32 1, i32 0, i1 false)
   ; CHECK-NEXT: [[BUFFANOT:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[BIND]]
-  ; CHECK-NEXT: [[REG:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 -1)
+  ; CHECK-NEXT: [[REG:%.*]] = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle [[BUFFANOT]], i8 -1){{$}}
   %1 = call i32 @llvm.dx.bufferUpdateCounter(target("dx.RawBuffer", i8, 0, 0) %buffer, i8 -1)
   ret void
 }
diff --git a/llvm/test/CodeGen/DirectX/ceil.ll b/llvm/test/CodeGen/DirectX/ceil.ll
index bd6e747c2fbf5f..73ea2476b028fe 100644
--- a/llvm/test/CodeGen/DirectX/ceil.ll
+++ b/llvm/test/CodeGen/DirectX/ceil.ll
@@ -4,14 +4,14 @@
 
 define noundef float @ceil_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 28, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 28, float %{{.*}}) #[[#ATTR:]]
   %elt.ceil = call float @llvm.ceil.f32(float %a)
   ret float %elt.ceil
 }
 
 define noundef half @ceil_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 28, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 28, half %{{.*}}) #[[#ATTR]]
   %elt.ceil = call half @llvm.ceil.f16(half %a)
   ret half %elt.ceil
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @ceil_float4(<4 x float> noundef %a) {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.ceil.f16(half)
 declare float @llvm.ceil.f32(float)
 declare <4 x float> @llvm.ceil.v4f32(<4 x float>) 
diff --git a/llvm/test/CodeGen/DirectX/comput_ids.ll b/llvm/test/CodeGen/DirectX/comput_ids.ll
index 976b3ea5c6ecdf..b1b6cf813b598b 100644
--- a/llvm/test/CodeGen/DirectX/comput_ids.ll
+++ b/llvm/test/CodeGen/DirectX/comput_ids.ll
@@ -9,7 +9,7 @@ target triple = "dxil-pc-shadermodel6.7-compute"
 ; Function Attrs: noinline nounwind optnone
 define i32 @test_thread_id(i32 %a) #0 {
 entry:
-; CHECK:call i32 @dx.op.threadId.i32(i32 93, i32 %{{.*}})
+; CHECK:call i32 @dx.op.threadId.i32(i32 93, i32 %{{.*}}) #[[#ATTR:]]
   %0 = call i32 @llvm.dx.thread.id(i32 %a)
   ret i32 %0
 }
@@ -18,7 +18,7 @@ entry:
 ; Function Attrs: noinline nounwind optnone
 define i32 @test_group_id(i32 %a) #0 {
 entry:
-; CHECK: call i32 @dx.op.groupId.i32(i32 94, i32 %{{.*}})
+; CHECK: call i32 @dx.op.groupId.i32(i32 94, i32 %{{.*}}) #[[#ATTR]]
   %0 = call i32 @llvm.dx.group.id(i32 %a)
   ret i32 %0
 }
@@ -27,7 +27,7 @@ entry:
 ; Function Attrs: noinline nounwind optnone
 define i32 @test_thread_id_in_group(i32 %a) #0 {
 entry:
-; CHECK: call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 %{{.*}})
+; CHECK: call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 %{{.*}}) #[[#ATTR]]
   %0 = call i32 @llvm.dx.thread.id.in.group(i32 %a)
   ret i32 %0
 }
@@ -36,11 +36,13 @@ entry:
 ; Function Attrs: noinline nounwind optnone
 define i32 @test_flattened_thread_id_in_group() #0 {
 entry:
-; CHECK: call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)
+; CHECK: call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96) #[[#ATTR]]
   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
   ret i32 %0
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 ; Function Attrs: nounwind readnone willreturn
 declare i32 @llvm.dx.thread.id(i32) #1
 declare i32 @llvm.dx.group.id(i32) #1
diff --git a/llvm/test/CodeGen/DirectX/cos.ll b/llvm/test/CodeGen/DirectX/cos.ll
index 85f5db25570b90..e86fd8c837c3de 100644
--- a/llvm/test/CodeGen/DirectX/cos.ll
+++ b/llvm/test/CodeGen/DirectX/cos.ll
@@ -4,14 +4,14 @@
 
 define noundef float @cos_float(float noundef %a) #0 {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 12, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 12, float %{{.*}}) #[[#ATTR:]]
   %elt.cos = call float @llvm.cos.f32(float %a)
   ret float %elt.cos
 }
 
 define noundef half @cos_half(half noundef %a) #0 {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 12, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 12, half %{{.*}}) #[[#ATTR]]
   %elt.cos = call half @llvm.cos.f16(half %a)
   ret half %elt.cos
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @cos_float4(<4 x float> noundef %a) #0 {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.cos.f16(half)
 declare float @llvm.cos.f32(float)
 declare <4 x float> @llvm.cos.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/cosh.ll b/llvm/test/CodeGen/DirectX/cosh.ll
index 670a8a3eae0864..b7ae6b63d72be6 100644
--- a/llvm/test/CodeGen/DirectX/cosh.ll
+++ b/llvm/test/CodeGen/DirectX/cosh.ll
@@ -4,14 +4,14 @@
 
 define noundef float @cosh_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 18, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 18, float %{{.*}}) #[[#ATTR:]]
   %elt.cosh = call float @llvm.cosh.f32(float %a)
   ret float %elt.cosh
 }
 
 define noundef half @cosh_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 18, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 18, half %{{.*}}) #[[#ATTR]]
   %elt.cosh = call half @llvm.cosh.f16(half %a)
   ret half %elt.cosh
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @cosh_float4(<4 x float> noundef %a) #0 {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.cosh.f16(half)
 declare float @llvm.cosh.f32(float)
 declare <4 x float> @llvm.cosh.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/countbits.ll b/llvm/test/CodeGen/DirectX/countbits.ll
index f03ab9c5e79c35..f1f509ce522ddd 100644
--- a/llvm/test/CodeGen/DirectX/countbits.ll
+++ b/llvm/test/CodeGen/DirectX/countbits.ll
@@ -4,7 +4,7 @@
 
 define noundef i16 @test_countbits_short(i16 noundef %a) {
 entry:
-; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}})
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}}) #[[#ATTR:]]
 ; CHECK-NEXT: [[B:%.*]] = trunc i32 [[A]] to i16
 ; CHECK-NEXT ret i16 [[B]]
   %elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a)
@@ -13,7 +13,7 @@ entry:
 
 define noundef i32 @test_countbits_short2(i16 noundef %a) {
 entry:
-; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}})
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}}) #[[#ATTR]]
 ; CHECK-NEXT: ret i32 [[A]]
   %elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a)
   %elt.zext = zext i16 %elt.ctpop to i32
@@ -22,7 +22,7 @@ entry:
 
 define noundef i32 @test_countbits_short3(i16 noundef %a) {
 entry:
-; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}})
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}}) #[[#ATTR]]
 ; CHECK-NEXT: ret i32 [[A]]
   %elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a)
   %elt.sext = sext i16 %elt.ctpop to i32
@@ -31,7 +31,7 @@ entry:
 
 define noundef i32 @test_countbits_int(i32 noundef %a) {
 entry:
-; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 %{{.*}})
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 %{{.*}}) #[[#ATTR]]
 ; CHECK-NEXT: ret i32 [[A]]
   %elt.ctpop = call i32 @llvm.ctpop.i32(i32 %a)
   ret i32 %elt.ctpop
@@ -39,7 +39,7 @@ entry:
 
 define noundef i64 @test_countbits_long(i64 noundef %a) {
 entry:
-; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i64(i32 31, i64 %{{.*}})
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i64(i32 31, i64 %{{.*}}) #[[#ATTR]]
 ; CHECK-NEXT: [[B:%.*]] = zext i32 [[A]] to i64
 ; CHECK-NEXT ret i64 [[B]]
   %elt.ctpop = call i64 @llvm.ctpop.i64(i64 %a)
@@ -48,7 +48,7 @@ entry:
 
 define noundef i32 @test_countbits_long2(i64 noundef %a) {
 entry:
-; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i64(i32 31, i64 %{{.*}})
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i64(i32 31, i64 %{{.*}}) #[[#ATTR]]
 ; CHECK-NEXT: ret i32 [[A]]
   %elt.ctpop = call i64 @llvm.ctpop.i64(i64 %a)
   %elt.trunc = trunc i64 %elt.ctpop to i32
@@ -58,13 +58,13 @@ entry:
 define noundef <4 x i32> @countbits_vec4_i32(<4 x i32> noundef %a)  {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2
@@ -73,6 +73,8 @@ entry:
   ret <4 x i32> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i16 @llvm.ctpop.i16(i16)
 declare i32 @llvm.ctpop.i32(i32)
 declare i64 @llvm.ctpop.i64(i64)
diff --git a/llvm/test/CodeGen/DirectX/dot4add_i8packed.ll b/llvm/test/CodeGen/DirectX/dot4add_i8packed.ll
index 7df0520505cea6..63d2873cb46e25 100644
--- a/llvm/test/CodeGen/DirectX/dot4add_i8packed.ll
+++ b/llvm/test/CodeGen/DirectX/dot4add_i8packed.ll
@@ -2,9 +2,11 @@
 
 define void @main(i32 %a, i32 %b, i32 %c) {
 entry:
-; CHECK: call i32 @dx.op.dot4AddPacked(i32 163, i32 %a, i32 %b, i32 %c)
+; CHECK: call i32 @dx.op.dot4AddPacked(i32 163, i32 %a, i32 %b, i32 %c) #[[#ATTR:]]
   %0 = call i32 @llvm.dx.dot4add.i8packed(i32 %a, i32 %b, i32 %c)
   ret void
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i32 @llvm.dx.dot4add.i8packed(i32, i32, i32)
diff --git a/llvm/test/CodeGen/DirectX/dot4add_u8packed.ll b/llvm/test/CodeGen/DirectX/dot4add_u8packed.ll
index 3836b4a4bc16c9..a9828bafddaab5 100644
--- a/llvm/test/CodeGen/DirectX/dot4add_u8packed.ll
+++ b/llvm/test/CodeGen/DirectX/dot4add_u8packed.ll
@@ -2,9 +2,11 @@
 
 define void @main(i32 %a, i32 %b, i32 %c) {
 entry:
-; CHECK: call i32 @dx.op.dot4AddPacked(i32 164, i32 %a, i32 %b, i32 %c)
+; CHECK: call i32 @dx.op.dot4AddPacked(i32 164, i32 %a, i32 %b, i32 %c) #[[#ATTR:]]
   %0 = call i32 @llvm.dx.dot4add.u8packed(i32 %a, i32 %b, i32 %c)
   ret void
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i32 @llvm.dx.dot4add.u8packed(i32, i32, i32)
diff --git a/llvm/test/CodeGen/DirectX/exp.ll b/llvm/test/CodeGen/DirectX/exp.ll
index c2d9938d27ecda..7a707e36bf9f12 100644
--- a/llvm/test/CodeGen/DirectX/exp.ll
+++ b/llvm/test/CodeGen/DirectX/exp.ll
@@ -4,7 +4,7 @@
 
 ; CHECK-LABEL: exp_float
 ; CHECK: fmul float 0x3FF7154760000000, %{{.*}}
-; CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}})
+; CHECK: call float @dx.op.unary.f32(i32 21, float %{{.*}}) #[[#ATTR:]]
 define noundef float @exp_float(float noundef %a) {
 entry:
   %a.addr = alloca float, align 4
@@ -16,7 +16,7 @@ entry:
 
 ; CHECK-LABEL: exp_half
 ; CHECK: fmul half 0xH3DC5, %{{.*}}
-; CHECK: call half @dx.op.unary.f16(i32 21, half %{{.*}})
+; CHECK: call half @dx.op.unary.f16(i32 21, half %{{.*}}) #[[#ATTR]]
 ; Function Attrs: noinline nounwind optnone
 define noundef half @exp_half(half noundef %a) {
 entry:
@@ -27,5 +27,7 @@ entry:
   ret half %elt.exp
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.exp.f16(half)
 declare float @llvm.exp.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/fdot.ll b/llvm/test/CodeGen/DirectX/fdot.ll
index 78e111c41feefd..c6f36087ba91df 100644
--- a/llvm/test/CodeGen/DirectX/fdot.ll
+++ b/llvm/test/CodeGen/DirectX/fdot.ll
@@ -10,7 +10,7 @@ entry:
 ; DOPCHECK: extractelement <2 x half> %a, i32 1
 ; DOPCHECK: extractelement <2 x half> %b, i32 0
 ; DOPCHECK: extractelement <2 x half> %b, i32 1
-; DOPCHECK: call half @dx.op.dot2.f16(i32 54, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
+; DOPCHECK: call half @dx.op.dot2.f16(i32 54, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}) #[[#ATTR:]]
 ; EXPCHECK: call half @llvm.dx.dot2.v2f16(<2 x half> %a, <2 x half> %b)
   %dx.dot = call half @llvm.dx.fdot.v2f16(<2 x half> %a, <2 x half> %b)
   ret half %dx.dot
@@ -25,7 +25,7 @@ entry:
 ; DOPCHECK: extractelement <3 x half> %b, i32 0
 ; DOPCHECK: extractelement <3 x half> %b, i32 1
 ; DOPCHECK: extractelement <3 x half> %b, i32 2
-; DOPCHECK: call half @dx.op.dot3.f16(i32 55, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
+; DOPCHECK: call half @dx.op.dot3.f16(i32 55, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}) #[[#ATTR]]
 ; EXPCHECK: call half @llvm.dx.dot3.v3f16(<3 x half> %a, <3 x half> %b)
   %dx.dot = call half @llvm.dx.fdot.v3f16(<3 x half> %a, <3 x half> %b)
   ret half %dx.dot
@@ -42,7 +42,7 @@ entry:
 ; DOPCHECK: extractelement <4 x half> %b, i32 1
 ; DOPCHECK: extractelement <4 x half> %b, i32 2
 ; DOPCHECK: extractelement <4 x half> %b, i32 3
-; DOPCHECK: call half @dx.op.dot4.f16(i32 56, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
+; DOPCHECK: call half @dx.op.dot4.f16(i32 56, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}) #[[#ATTR]]
 ; EXPCHECK: call half @llvm.dx.dot4.v4f16(<4 x half> %a, <4 x half> %b)
   %dx.dot = call half @llvm.dx.fdot.v4f16(<4 x half> %a, <4 x half> %b)
   ret half %dx.dot
@@ -55,7 +55,7 @@ entry:
 ; DOPCHECK: extractelement <2 x float> %a, i32 1
 ; DOPCHECK: extractelement <2 x float> %b, i32 0
 ; DOPCHECK: extractelement <2 x float> %b, i32 1
-; DOPCHECK: call float @dx.op.dot2.f32(i32 54, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
+; DOPCHECK: call float @dx.op.dot2.f32(i32 54, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) #[[#ATTR]]
 ; EXPCHECK: call float @llvm.dx.dot2.v2f32(<2 x float> %a, <2 x float> %b)
   %dx.dot = call float @llvm.dx.fdot.v2f32(<2 x float> %a, <2 x float> %b)
   ret float %dx.dot
@@ -70,7 +70,7 @@ entry:
 ; DOPCHECK: extractelement <3 x float> %b, i32 0
 ; DOPCHECK: extractelement <3 x float> %b, i32 1
 ; DOPCHECK: extractelement <3 x float> %b, i32 2
-; DOPCHECK: call float @dx.op.dot3.f32(i32 55, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
+; DOPCHECK: call float @dx.op.dot3.f32(i32 55, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) #[[#ATTR]]
 ; EXPCHECK: call float @llvm.dx.dot3.v3f32(<3 x float> %a, <3 x float> %b)
   %dx.dot = call float @llvm.dx.fdot.v3f32(<3 x float> %a, <3 x float> %b)
   ret float %dx.dot
@@ -87,12 +87,14 @@ entry:
 ; DOPCHECK: extractelement <4 x float> %b, i32 1
 ; DOPCHECK: extractelement <4 x float> %b, i32 2
 ; DOPCHECK: extractelement <4 x float> %b, i32 3
-; DOPCHECK: call float @dx.op.dot4.f32(i32 56, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
+; DOPCHECK: call float @dx.op.dot4.f32(i32 56, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) #[[#ATTR]]
 ; EXPCHECK: call float @llvm.dx.dot4.v4f32(<4 x float> %a, <4 x float> %b)
   %dx.dot = call float @llvm.dx.fdot.v4f32(<4 x float> %a, <4 x float> %b)
   ret float %dx.dot
 }
 
+; DOPCHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half  @llvm.dx.fdot.v2f16(<2 x half> , <2 x half> )
 declare half  @llvm.dx.fdot.v3f16(<3 x half> , <3 x half> )
 declare half  @llvm.dx.fdot.v4f16(<4 x half> , <4 x half> )
diff --git a/llvm/test/CodeGen/DirectX/firstbithigh.ll b/llvm/test/CodeGen/DirectX/firstbithigh.ll
index 5584c433fb6f0e..794b0f20a02697 100644
--- a/llvm/test/CodeGen/DirectX/firstbithigh.ll
+++ b/llvm/test/CodeGen/DirectX/firstbithigh.ll
@@ -4,42 +4,42 @@
 
 define noundef i32 @test_firstbithigh_ushort(i16 noundef %a) {
 entry:
-; CHECK: call i32 @dx.op.unaryBits.i16(i32 33, i16 %{{.*}})
+; CHECK: call i32 @dx.op.unaryBits.i16(i32 33, i16 %{{.*}}) #[[#ATTR:]]
   %elt.firstbithigh = call i32 @llvm.dx.firstbituhigh.i16(i16 %a)
   ret i32 %elt.firstbithigh
 }
 
 define noundef i32 @test_firstbithigh_short(i16 noundef %a) {
 entry:
-; CHECK: call i32 @dx.op.unaryBits.i16(i32 34, i16 %{{.*}})
+; CHECK: call i32 @dx.op.unaryBits.i16(i32 34, i16 %{{.*}}) #[[#ATTR]]
   %elt.firstbithigh = call i32 @llvm.dx.firstbitshigh.i16(i16 %a)
   ret i32 %elt.firstbithigh
 }
 
 define noundef i32 @test_firstbithigh_uint(i32 noundef %a) {
 entry:
-; CHECK: call i32 @dx.op.unaryBits.i32(i32 33, i32 %{{.*}})
+; CHECK: call i32 @dx.op.unaryBits.i32(i32 33, i32 %{{.*}}) #[[#ATTR]]
   %elt.firstbithigh = call i32 @llvm.dx.firstbituhigh.i32(i32 %a)
   ret i32 %elt.firstbithigh
 }
 
 define noundef i32 @test_firstbithigh_int(i32 noundef %a) {
 entry:
-; CHECK: call i32 @dx.op.unaryBits.i32(i32 34, i32 %{{.*}})
+; CHECK: call i32 @dx.op.unaryBits.i32(i32 34, i32 %{{.*}}) #[[#ATTR]]
   %elt.firstbithigh = call i32 @llvm.dx.firstbitshigh.i32(i32 %a)
   ret i32 %elt.firstbithigh
 }
 
 define noundef i32 @test_firstbithigh_ulong(i64 noundef %a) {
 entry:
-; CHECK: call i32 @dx.op.unaryBits.i64(i32 33, i64 %{{.*}})
+; CHECK: call i32 @dx.op.unaryBits.i64(i32 33, i64 %{{.*}}) #[[#ATTR]]
   %elt.firstbithigh = call i32 @llvm.dx.firstbituhigh.i64(i64 %a)
   ret i32 %elt.firstbithigh
 }
 
 define noundef i32 @test_firstbithigh_long(i64 noundef %a) {
 entry:
-; CHECK: call i32 @dx.op.unaryBits.i64(i32 34, i64 %{{.*}})
+; CHECK: call i32 @dx.op.unaryBits.i64(i32 34, i64 %{{.*}}) #[[#ATTR]]
   %elt.firstbithigh = call i32 @llvm.dx.firstbitshigh.i64(i64 %a)
   ret i32 %elt.firstbithigh
 }
@@ -47,13 +47,13 @@ entry:
 define noundef <4 x i32> @test_firstbituhigh_vec4_i32(<4 x i32> noundef %a)  {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2
@@ -65,13 +65,13 @@ entry:
 define noundef <4 x i32> @test_firstbitshigh_vec4_i32(<4 x i32> noundef %a)  {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2
@@ -80,6 +80,8 @@ entry:
   ret <4 x i32> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i32 @llvm.dx.firstbituhigh.i16(i16)
 declare i32 @llvm.dx.firstbituhigh.i32(i32)
 declare i32 @llvm.dx.firstbituhigh.i64(i64)
diff --git a/llvm/test/CodeGen/DirectX/floor.ll b/llvm/test/CodeGen/DirectX/floor.ll
index eaab3988c2c703..e82f23628c3169 100644
--- a/llvm/test/CodeGen/DirectX/floor.ll
+++ b/llvm/test/CodeGen/DirectX/floor.ll
@@ -4,14 +4,14 @@
 
 define noundef float @floor_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 27, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 27, float %{{.*}}) #[[#ATTR:]]
   %elt.floor = call float @llvm.floor.f32(float %a)
   ret float %elt.floor
 }
 
 define noundef half @floor_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 27, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 27, half %{{.*}}) #[[#ATTR]]
   %elt.floor = call half @llvm.floor.f16(half %a)
   ret half %elt.floor
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @floor_float4(<4 x float> noundef %a) {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.floor.f16(half)
 declare float @llvm.floor.f32(float)
 declare <4 x float> @llvm.floor.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/fmad.ll b/llvm/test/CodeGen/DirectX/fmad.ll
index e1f4e5cd50c4f0..868a3dd1503b58 100644
--- a/llvm/test/CodeGen/DirectX/fmad.ll
+++ b/llvm/test/CodeGen/DirectX/fmad.ll
@@ -1,10 +1,11 @@
 ; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
 
 ; Make sure dxil operation function calls for round are generated for float and half.
-; CHECK:call half @dx.op.tertiary.f16(i32 46, half %{{.*}}, half %{{.*}}, half %{{.*}})
-; CHECK:call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}})
-; CHECK:call double @dx.op.tertiary.f64(i32 46, double %{{.*}}, double %{{.*}}, double %{{.*}})
+; CHECK:call half @dx.op.tertiary.f16(i32 46, half %{{.*}}, half %{{.*}}, half %{{.*}}) #[[#ATTR:]]
+; CHECK:call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}}) #[[#ATTR]]
+; CHECK:call double @dx.op.tertiary.f64(i32 46, double %{{.*}}, double %{{.*}}, double %{{.*}}) #[[#ATTR]]
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
 
 target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
 target triple = "dxil-pc-shadermodel6.7-library"
diff --git a/llvm/test/CodeGen/DirectX/fmax.ll b/llvm/test/CodeGen/DirectX/fmax.ll
index 05852ee33486d1..60dba72836e233 100644
--- a/llvm/test/CodeGen/DirectX/fmax.ll
+++ b/llvm/test/CodeGen/DirectX/fmax.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL:test_fmax_half
 define noundef half @test_fmax_half(half noundef %a, half noundef %b) {
 entry:
-; CHECK: call half @dx.op.binary.f16(i32 35, half %{{.*}}, half %{{.*}})
+; CHECK: call half @dx.op.binary.f16(i32 35, half %{{.*}}, half %{{.*}}) #[[#ATTR:]]
   %0 = call half @llvm.maxnum.f16(half %a, half %b)
   ret half %0
 }
@@ -13,7 +13,7 @@ entry:
 ; CHECK-LABEL:test_fmax_float
 define noundef float @test_fmax_float(float noundef %a, float noundef %b) {
 entry:
-; CHECK: call float @dx.op.binary.f32(i32 35, float %{{.*}}, float %{{.*}})
+; CHECK: call float @dx.op.binary.f32(i32 35, float %{{.*}}, float %{{.*}}) #[[#ATTR]]
   %0 = call float @llvm.maxnum.f32(float %a, float %b)
   ret float %0
 }
@@ -21,11 +21,13 @@ entry:
 ; CHECK-LABEL:test_fmax_double
 define noundef double @test_fmax_double(double noundef %a, double noundef %b) {
 entry:
-; CHECK: call double @dx.op.binary.f64(i32 35, double %{{.*}}, double %{{.*}})
+; CHECK: call double @dx.op.binary.f64(i32 35, double %{{.*}}, double %{{.*}}) #[[#ATTR]]
   %0 = call double @llvm.maxnum.f64(double %a, double %b)
   ret double %0
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.maxnum.f16(half, half)
 declare float @llvm.maxnum.f32(float, float)
 declare double @llvm.maxnum.f64(double, double)
diff --git a/llvm/test/CodeGen/DirectX/fmin.ll b/llvm/test/CodeGen/DirectX/fmin.ll
index 1c6c7ca3f2e38a..d592b7c26fbfb1 100644
--- a/llvm/test/CodeGen/DirectX/fmin.ll
+++ b/llvm/test/CodeGen/DirectX/fmin.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL:test_fmin_half
 define noundef half @test_fmin_half(half noundef %a, half noundef %b) {
 entry:
-; CHECK: call half @dx.op.binary.f16(i32 36, half %{{.*}}, half %{{.*}})
+; CHECK: call half @dx.op.binary.f16(i32 36, half %{{.*}}, half %{{.*}}) #[[#ATTR:]]
   %0 = call half @llvm.minnum.f16(half %a, half %b)
   ret half %0
 }
@@ -13,7 +13,7 @@ entry:
 ; CHECK-LABEL:test_fmin_float
 define noundef float @test_fmin_float(float noundef %a, float noundef %b) {
 entry:
-; CHECK: call float @dx.op.binary.f32(i32 36, float %{{.*}}, float %{{.*}})
+; CHECK: call float @dx.op.binary.f32(i32 36, float %{{.*}}, float %{{.*}}) #[[#ATTR]]
   %0 = call float @llvm.minnum.f32(float %a, float %b)
   ret float %0
 }
@@ -21,11 +21,13 @@ entry:
 ; CHECK-LABEL:test_fmin_double
 define noundef double @test_fmin_double(double noundef %a, double noundef %b) {
 entry:
-; CHECK: call double @dx.op.binary.f64(i32 36, double %{{.*}}, double %{{.*}})
+; CHECK: call double @dx.op.binary.f64(i32 36, double %{{.*}}, double %{{.*}}) #[[#ATTR]]
   %0 = call double @llvm.minnum.f64(double %a, double %b)
   ret double %0
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.minnum.f16(half, half)
 declare float @llvm.minnum.f32(float, float)
 declare double @llvm.minnum.f64(double, double)
diff --git a/llvm/test/CodeGen/DirectX/frac.ll b/llvm/test/CodeGen/DirectX/frac.ll
index ef24527ce837b0..e9858287b7b531 100644
--- a/llvm/test/CodeGen/DirectX/frac.ll
+++ b/llvm/test/CodeGen/DirectX/frac.ll
@@ -7,7 +7,7 @@ define noundef half @frac_half(half noundef %a) {
 ; CHECK-LABEL: define noundef half @frac_half(
 ; CHECK-SAME: half noundef [[A:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[DX_FRAC1:%.*]] = call half @dx.op.unary.f16(i32 22, half [[A]])
+; CHECK-NEXT:    [[DX_FRAC1:%.*]] = call half @dx.op.unary.f16(i32 22, half [[A]]) #[[#ATTR:]]
 ; CHECK-NEXT:    ret half [[DX_FRAC1]]
 ;
 entry:
@@ -19,7 +19,7 @@ define noundef float @frac_float(float noundef %a) #0 {
 ; CHECK-LABEL: define noundef float @frac_float(
 ; CHECK-SAME: float noundef [[A:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[DX_FRAC1:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A]])
+; CHECK-NEXT:    [[DX_FRAC1:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A]]) #[[#ATTR]]
 ; CHECK-NEXT:    ret float [[DX_FRAC1]]
 ;
 entry:
@@ -32,13 +32,13 @@ define noundef <4 x float> @frac_float4(<4 x float> noundef %a) #0 {
 ; CHECK-SAME: <4 x float> noundef [[A:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <4 x float> [[A]], i64 0
-; CHECK-NEXT:    [[DOTI04:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I0]])
+; CHECK-NEXT:    [[DOTI04:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I0]]) #[[#ATTR]]
 ; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <4 x float> [[A]], i64 1
-; CHECK-NEXT:    [[DOTI13:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I1]])
+; CHECK-NEXT:    [[DOTI13:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I1]]) #[[#ATTR]]
 ; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <4 x float> [[A]], i64 2
-; CHECK-NEXT:    [[DOTI22:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I2]])
+; CHECK-NEXT:    [[DOTI22:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I2]]) #[[#ATTR]]
 ; CHECK-NEXT:    [[A_I3:%.*]] = extractelement <4 x float> [[A]], i64 3
-; CHECK-NEXT:    [[DOTI31:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I3]])
+; CHECK-NEXT:    [[DOTI31:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I3]]) #[[#ATTR]]
 ; CHECK-NEXT:    [[DOTUPTO0:%.*]] = insertelement <4 x float> poison, float [[DOTI04]], i64 0
 ; CHECK-NEXT:    [[DOTUPTO1:%.*]] = insertelement <4 x float> [[DOTUPTO0]], float [[DOTI13]], i64 1
 ; CHECK-NEXT:    [[DOTUPTO2:%.*]] = insertelement <4 x float> [[DOTUPTO1]], float [[DOTI22]], i64 2
@@ -50,6 +50,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half  @llvm.dx.frac.f16(half)
 declare float @llvm.dx.frac.f32(float)
 declare <4 x float> @llvm.dx.frac.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/idot.ll b/llvm/test/CodeGen/DirectX/idot.ll
index 26e7ff395bd16b..8a89d5d3a7a9bc 100644
--- a/llvm/test/CodeGen/DirectX/idot.ll
+++ b/llvm/test/CodeGen/DirectX/idot.ll
@@ -12,7 +12,7 @@ entry:
 ; CHECK: extractelement <2 x i16> %a, i64 1
 ; CHECK: extractelement <2 x i16> %b, i64 1
 ; EXPCHECK: call i16 @llvm.dx.imad.i16(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
-; DOPCHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
+; DOPCHECK: call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
   %dot = call i16 @llvm.dx.sdot.v3i16(<2 x i16> %a, <2 x i16> %b)
   ret i16 %dot
 }
@@ -26,15 +26,15 @@ entry:
 ; CHECK: extractelement <4 x i32> %a, i64 1
 ; CHECK: extractelement <4 x i32> %b, i64 1
 ; EXPCHECK: call i32 @llvm.dx.imad.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
 ; CHECK: extractelement <4 x i32> %a, i64 2
 ; CHECK: extractelement <4 x i32> %b, i64 2
 ; EXPCHECK: call i32 @llvm.dx.imad.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
 ; CHECK: extractelement <4 x i32> %a, i64 3
 ; CHECK: extractelement <4 x i32> %b, i64 3
 ; EXPCHECK: call i32 @llvm.dx.imad.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
   %dot = call i32 @llvm.dx.sdot.v4i32(<4 x i32> %a, <4 x i32> %b)
   ret i32 %dot
 }
@@ -48,11 +48,11 @@ entry:
 ; CHECK: extractelement <3 x i16> %a, i64 1
 ; CHECK: extractelement <3 x i16> %b, i64 1
 ; EXPCHECK: call i16 @llvm.dx.umad.i16(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
-; DOPCHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
+; DOPCHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
 ; CHECK: extractelement <3 x i16> %a, i64 2
 ; CHECK: extractelement <3 x i16> %b, i64 2
 ; EXPCHECK: call i16 @llvm.dx.umad.i16(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
-; DOPCHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
+; DOPCHECK: call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR]]
   %dot = call i16 @llvm.dx.udot.v3i16(<3 x i16> %a, <3 x i16> %b)
   ret i16 %dot
 }
@@ -66,15 +66,15 @@ entry:
 ; CHECK: extractelement <4 x i32> %a, i64 1
 ; CHECK: extractelement <4 x i32> %b, i64 1
 ; EXPCHECK: call i32 @llvm.dx.umad.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
 ; CHECK: extractelement <4 x i32> %a, i64 2
 ; CHECK: extractelement <4 x i32> %b, i64 2
 ; EXPCHECK: call i32 @llvm.dx.umad.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
 ; CHECK: extractelement <4 x i32> %a, i64 3
 ; CHECK: extractelement <4 x i32> %b, i64 3
 ; EXPCHECK: call i32 @llvm.dx.umad.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+; DOPCHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
   %dot = call i32 @llvm.dx.udot.v4i32(<4 x i32> %a, <4 x i32> %b)
   ret i32 %dot
 }
@@ -88,11 +88,13 @@ entry:
 ; CHECK: extractelement <2 x i64> %a, i64 1
 ; CHECK: extractelement <2 x i64> %b, i64 1
 ; EXPCHECK: call i64 @llvm.dx.umad.i64(i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}})
-; DOPCHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}})
+; DOPCHECK: call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
   %dot = call i64 @llvm.dx.udot.v2i64(<2 x i64> %a, <2 x i64> %b)
   ret i64 %dot
 }
 
+; DOPCHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i16 @llvm.dx.sdot.v2i16(<2 x i16>, <2 x i16>)
 declare i32 @llvm.dx.sdot.v4i32(<4 x i32>, <4 x i32>)
 declare i16 @llvm.dx.udot.v3i32(<3 x i16>, <3 x i16>)
diff --git a/llvm/test/CodeGen/DirectX/imad.ll b/llvm/test/CodeGen/DirectX/imad.ll
index 5b818f86bc7f25..5d9463d658cf56 100644
--- a/llvm/test/CodeGen/DirectX/imad.ll
+++ b/llvm/test/CodeGen/DirectX/imad.ll
@@ -1,9 +1,11 @@
 ; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
 
 ; Make sure dxil operation function calls for round are generated for float and half.
-; CHECK:call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
-; CHECK:call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-; CHECK:call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}})
+; CHECK:call i16 @dx.op.tertiary.i16(i32 48, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
+; CHECK:call i32 @dx.op.tertiary.i32(i32 48, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+; CHECK:call i64 @dx.op.tertiary.i64(i32 48, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
 
 target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
 target triple = "dxil-pc-shadermodel6.7-library"
diff --git a/llvm/test/CodeGen/DirectX/isinf.ll b/llvm/test/CodeGen/DirectX/isinf.ll
index 03a00c40498d5a..2bd83e94b9708b 100644
--- a/llvm/test/CodeGen/DirectX/isinf.ll
+++ b/llvm/test/CodeGen/DirectX/isinf.ll
@@ -4,18 +4,19 @@
 
 define noundef i1 @isinf_float(float noundef %a) {
 entry:
-  ; CHECK: call i1 @dx.op.isSpecialFloat.f32(i32 9, float %{{.*}})
+  ; CHECK: call i1 @dx.op.isSpecialFloat.f32(i32 9, float %{{.*}}) #[[#ATTR:]]
   %dx.isinf = call i1 @llvm.dx.isinf.f32(float %a)
   ret i1 %dx.isinf
 }
 
 define noundef i1 @isinf_half(half noundef %a) {
 entry:
-  ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}})
+  ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}}) #[[#ATTR]]
   %dx.isinf = call i1 @llvm.dx.isinf.f16(half %a)
   ret i1 %dx.isinf
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
 
 declare i1 @llvm.dx.isinf.f16(half)
 declare i1 @llvm.dx.isinf.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/log.ll b/llvm/test/CodeGen/DirectX/log.ll
index 195713309cd448..d3894137619203 100644
--- a/llvm/test/CodeGen/DirectX/log.ll
+++ b/llvm/test/CodeGen/DirectX/log.ll
@@ -5,7 +5,7 @@
 
 define noundef float @log_float(float noundef %a) #0 {
 entry:
-; DOPCHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}})
+; DOPCHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) #[[#ATTR:]]
 ; EXPCHECK: call float @llvm.log2.f32(float %a)
 ; CHECK: fmul float 0x3FE62E4300000000, %{{.*}}
   %elt.log = call float @llvm.log.f32(float %a)
@@ -14,12 +14,14 @@ entry:
 
 define noundef half @log_half(half noundef %a) #0 {
 entry:
-; DOPCHECK: call half @dx.op.unary.f16(i32 23, half %{{.*}})
+; DOPCHECK: call half @dx.op.unary.f16(i32 23, half %{{.*}}) #[[#ATTR]]
 ; EXPCHECK: call half @llvm.log2.f16(half %a)
 ; CHECK: fmul half 0xH398C, %{{.*}}
   %elt.log = call half @llvm.log.f16(half %a)
   ret half %elt.log
 }
 
+; DOPCHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.log.f16(half)
 declare float @llvm.log.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/log10.ll b/llvm/test/CodeGen/DirectX/log10.ll
index f3acccce7e451a..3f40f80310ce29 100644
--- a/llvm/test/CodeGen/DirectX/log10.ll
+++ b/llvm/test/CodeGen/DirectX/log10.ll
@@ -5,7 +5,7 @@
 
 define noundef float @log10_float(float noundef %a) #0 {
 entry:
-; DOPCHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}})
+; DOPCHECK: call float @dx.op.unary.f32(i32 23, float %{{.*}}) #[[#ATTR:]]
 ; EXPCHECK: call float @llvm.log2.f32(float %a)
 ; CHECK: fmul float 0x3FD3441340000000, %{{.*}}
   %elt.log10 = call float @llvm.log10.f32(float %a)
@@ -14,12 +14,14 @@ entry:
 
 define noundef half @log10_half(half noundef %a) #0 {
 entry:
-; DOPCHECK: call half @dx.op.unary.f16(i32 23, half %{{.*}})
+; DOPCHECK: call half @dx.op.unary.f16(i32 23, half %{{.*}}) #[[#ATTR]]
 ; EXPCHECK: call half @llvm.log2.f16(half %a)
 ; CHECK: fmul half 0xH34D1, %{{.*}}
   %elt.log10 = call half @llvm.log10.f16(half %a)
   ret half %elt.log10
 }
 
+; DOPCHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.log10.f16(half)
 declare float @llvm.log10.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/log2.ll b/llvm/test/CodeGen/DirectX/log2.ll
index d6a7ba0b7dda75..eaf1183a2c810c 100644
--- a/llvm/test/CodeGen/DirectX/log2.ll
+++ b/llvm/test/CodeGen/DirectX/log2.ll
@@ -4,17 +4,19 @@
 
 define noundef float @log2_float(float noundef %a) #0 {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 23, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 23, float %{{.*}}) #[[#ATTR:]]
   %elt.log2 = call float @llvm.log2.f32(float %a)
   ret float %elt.log2
 }
 
 define noundef half @log2_half(half noundef %a) #0 {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 23, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 23, half %{{.*}}) #[[#ATTR]]
   %elt.log2 = call half @llvm.log2.f16(half %a)
   ret half %elt.log2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.log2.f16(half)
 declare float @llvm.log2.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/reversebits.ll b/llvm/test/CodeGen/DirectX/reversebits.ll
index a79b901408cf21..acd00b8d9b8d53 100644
--- a/llvm/test/CodeGen/DirectX/reversebits.ll
+++ b/llvm/test/CodeGen/DirectX/reversebits.ll
@@ -5,7 +5,7 @@
 ; Function Attrs: nounwind
 define noundef i16 @test_bitreverse_short(i16 noundef %a) {
 entry:
-; CHECK:call i16 @dx.op.unary.i16(i32 30, i16 %{{.*}})
+; CHECK:call i16 @dx.op.unary.i16(i32 30, i16 %{{.*}}) #[[#ATTR:]]
   %elt.bitreverse = call i16 @llvm.bitreverse.i16(i16 %a)
   ret i16 %elt.bitreverse
 }
@@ -13,7 +13,7 @@ entry:
 ; Function Attrs: nounwind
 define noundef i32 @test_bitreverse_int(i32 noundef %a) {
 entry:
-; CHECK:call i32 @dx.op.unary.i32(i32 30, i32 %{{.*}})
+; CHECK:call i32 @dx.op.unary.i32(i32 30, i32 %{{.*}}) #[[#ATTR]]
   %elt.bitreverse = call i32 @llvm.bitreverse.i32(i32 %a)
   ret i32 %elt.bitreverse
 }
@@ -21,7 +21,7 @@ entry:
 ; Function Attrs: nounwind
 define noundef i64 @test_bitreverse_long(i64 noundef %a) {
 entry:
-; CHECK:call i64 @dx.op.unary.i64(i32 30, i64 %{{.*}})
+; CHECK:call i64 @dx.op.unary.i64(i32 30, i64 %{{.*}}) #[[#ATTR]]
   %elt.bitreverse = call i64 @llvm.bitreverse.i64(i64 %a)
   ret i64 %elt.bitreverse
 }
@@ -29,13 +29,13 @@ entry:
 define noundef <4 x i32> @bitreverse_int324(<4 x i32> noundef %a) #0 {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2
@@ -44,6 +44,8 @@ entry:
   ret <4 x i32> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i16 @llvm.bitreverse.i16(i16)
 declare i32 @llvm.bitreverse.i32(i32)
 declare i64 @llvm.bitreverse.i64(i64)
diff --git a/llvm/test/CodeGen/DirectX/round.ll b/llvm/test/CodeGen/DirectX/round.ll
index b08cbac5f42e91..165a201281b315 100644
--- a/llvm/test/CodeGen/DirectX/round.ll
+++ b/llvm/test/CodeGen/DirectX/round.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL: round_half
 define noundef half @round_half(half noundef %a) {
 entry:
-; CHECK: call half @dx.op.unary.f16(i32 26, half %{{.*}})
+; CHECK: call half @dx.op.unary.f16(i32 26, half %{{.*}}) #[[#ATTR:]]
   %elt.roundeven = call half @llvm.roundeven.f16(half %a)
   ret half %elt.roundeven
 }
@@ -13,7 +13,7 @@ entry:
 ; CHECK-LABEL: round_float
 define noundef float @round_float(float noundef %a) {
 entry:
-; CHECK: call float @dx.op.unary.f32(i32 26, float %{{.*}})
+; CHECK: call float @dx.op.unary.f32(i32 26, float %{{.*}}) #[[#ATTR]]
   %elt.roundeven = call float @llvm.roundeven.f32(float %a)
   ret float %elt.roundeven
 }
@@ -21,13 +21,13 @@ entry:
 define noundef <4 x float> @round_float4(<4 x float> noundef %a) #0 {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -36,6 +36,7 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
 
 declare half @llvm.roundeven.f16(half)
 declare float @llvm.roundeven.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/rsqrt.ll b/llvm/test/CodeGen/DirectX/rsqrt.ll
index 612b6222e7594e..f755025201bf4e 100644
--- a/llvm/test/CodeGen/DirectX/rsqrt.ll
+++ b/llvm/test/CodeGen/DirectX/rsqrt.ll
@@ -7,7 +7,7 @@
 define noundef float @rsqrt_float(float noundef %a) {
 ; CHECK-SAME: float noundef [[A:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[DX_RSQRT1:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A]])
+; CHECK-NEXT:    [[DX_RSQRT1:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A]]) #[[#ATTR:]]
 ; CHECK-NEXT:    ret float [[DX_RSQRT1]]
 ;
 entry:
@@ -19,7 +19,7 @@ entry:
 define noundef half @rsqrt_half(half noundef %a) {
 ; CHECK-SAME: half noundef [[A:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[DX_RSQRT1:%.*]] = call half @dx.op.unary.f16(i32 25, half [[A]])
+; CHECK-NEXT:    [[DX_RSQRT1:%.*]] = call half @dx.op.unary.f16(i32 25, half [[A]]) #[[#ATTR]]
 ; CHECK-NEXT:    ret half [[DX_RSQRT1]]
 ;
 entry:
@@ -32,13 +32,13 @@ define noundef <4 x float> @rsqrt_float4(<4 x float> noundef %a) #0 {
 ; CHECK-SAME: <4 x float> noundef [[A:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <4 x float> [[A]], i64 0
-; CHECK-NEXT:    [[DOTI04:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I0]])
+; CHECK-NEXT:    [[DOTI04:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I0]]) #[[#ATTR]]
 ; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <4 x float> [[A]], i64 1
-; CHECK-NEXT:    [[DOTI13:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I1]])
+; CHECK-NEXT:    [[DOTI13:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I1]]) #[[#ATTR]]
 ; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <4 x float> [[A]], i64 2
-; CHECK-NEXT:    [[DOTI22:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I2]])
+; CHECK-NEXT:    [[DOTI22:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I2]]) #[[#ATTR]]
 ; CHECK-NEXT:    [[A_I3:%.*]] = extractelement <4 x float> [[A]], i64 3
-; CHECK-NEXT:    [[DOTI31:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I3]])
+; CHECK-NEXT:    [[DOTI31:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I3]]) #[[#ATTR]]
 ; CHECK-NEXT:    [[DOTUPTO0:%.*]] = insertelement <4 x float> poison, float [[DOTI04]], i64 0
 ; CHECK-NEXT:    [[DOTUPTO1:%.*]] = insertelement <4 x float> [[DOTUPTO0]], float [[DOTI13]], i64 1
 ; CHECK-NEXT:    [[DOTUPTO2:%.*]] = insertelement <4 x float> [[DOTUPTO1]], float [[DOTI22]], i64 2
@@ -50,6 +50,7 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
 
 declare half @llvm.dx.rsqrt.f16(half)
 declare float @llvm.dx.rsqrt.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/saturate.ll b/llvm/test/CodeGen/DirectX/saturate.ll
index 404cab7b665d0e..0bb1e55421046d 100644
--- a/llvm/test/CodeGen/DirectX/saturate.ll
+++ b/llvm/test/CodeGen/DirectX/saturate.ll
@@ -4,7 +4,7 @@
 ; CHECK-LABEL: test_saturate_half
 define noundef half @test_saturate_half(half noundef %p0) {
 entry:
-  ; CHECK: call half @dx.op.unary.f16(i32 7, half %p0)
+  ; CHECK: call half @dx.op.unary.f16(i32 7, half %p0) #[[#ATTR:]]
   %hlsl.saturate = call half @llvm.dx.saturate.f16(half %p0)
   ; CHECK: ret half
   ret half %hlsl.saturate
@@ -13,7 +13,7 @@ entry:
 ; CHECK-LABEL: test_saturate_float
 define noundef float @test_saturate_float(float noundef %p0) {
 entry:
-  ; CHECK: call float @dx.op.unary.f32(i32 7, float %p0)
+  ; CHECK: call float @dx.op.unary.f32(i32 7, float %p0) #[[#ATTR]]
   %hlsl.saturate = call float @llvm.dx.saturate.f32(float %p0)
   ; CHECK: ret float
   ret float %hlsl.saturate
@@ -22,12 +22,14 @@ entry:
 ; CHECK-LABEL: test_saturate_double
 define noundef double @test_saturate_double(double noundef %p0) {
 entry:
-  ; CHECK: call double @dx.op.unary.f64(i32 7, double %p0)
+  ; CHECK: call double @dx.op.unary.f64(i32 7, double %p0) #[[#ATTR]]
   %hlsl.saturate = call double @llvm.dx.saturate.f64(double %p0)
   ; CHECK: ret double
   ret double %hlsl.saturate
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.dx.saturate.f16(half)
 declare float @llvm.dx.saturate.f32(float)
 declare double @llvm.dx.saturate.f64(double)
diff --git a/llvm/test/CodeGen/DirectX/sin.ll b/llvm/test/CodeGen/DirectX/sin.ll
index ac6b217be80e75..a403e7ce7ad032 100644
--- a/llvm/test/CodeGen/DirectX/sin.ll
+++ b/llvm/test/CodeGen/DirectX/sin.ll
@@ -4,14 +4,14 @@
 
 define noundef float @sin_float(float noundef %a) {
 entry:
-  ; CHECK:call float @dx.op.unary.f32(i32 13, float %{{.*}})
+  ; CHECK:call float @dx.op.unary.f32(i32 13, float %{{.*}}) #[[#ATTR:]]
   %1 = call float @llvm.sin.f32(float %a)
   ret float %1
 }
 
 define noundef half @sin_half(half noundef %a) {
 entry:
-  ; CHECK:call half @dx.op.unary.f16(i32 13, half %{{.*}})
+  ; CHECK:call half @dx.op.unary.f16(i32 13, half %{{.*}}) #[[#ATTR]]
   %1 = call half @llvm.sin.f16(half %a)
   ret half %1
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @sin_float4(<4 x float> noundef %a) {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.sin.f16(half)
 declare float @llvm.sin.f32(float)
 declare <4 x float> @llvm.sin.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/sinh.ll b/llvm/test/CodeGen/DirectX/sinh.ll
index deba726e8d9adc..5cbbdb09e9df0f 100644
--- a/llvm/test/CodeGen/DirectX/sinh.ll
+++ b/llvm/test/CodeGen/DirectX/sinh.ll
@@ -4,14 +4,14 @@
 
 define noundef float @sinh_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 19, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 19, float %{{.*}}) #[[#ATTR:]]
   %elt.sinh = call float @llvm.sinh.f32(float %a)
   ret float %elt.sinh
 }
 
 define noundef half @sinh_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 19, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 19, half %{{.*}}) #[[#ATTR]]
   %elt.sinh = call half @llvm.sinh.f16(half %a)
   ret half %elt.sinh
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @sinh_float4(<4 x float> noundef %a) {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.sinh.f16(half)
 declare float @llvm.sinh.f32(float)
 declare <4 x float> @llvm.sinh.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/smax.ll b/llvm/test/CodeGen/DirectX/smax.ll
index bcda51cb0bfba6..2165a11b7d606b 100644
--- a/llvm/test/CodeGen/DirectX/smax.ll
+++ b/llvm/test/CodeGen/DirectX/smax.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL:test_smax_i16
 define noundef i16 @test_smax_i16(i16 noundef %a, i16 noundef %b) {
 entry:
-; CHECK: call i16 @dx.op.binary.i16(i32 37, i16 %{{.*}}, i16 %{{.*}})
+; CHECK: call i16 @dx.op.binary.i16(i32 37, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
   %0 = call i16 @llvm.smax.i16(i16 %a, i16 %b)
   ret i16 %0
 }
@@ -13,7 +13,7 @@ entry:
 ; CHECK-LABEL:test_smax_i32
 define noundef i32 @test_smax_i32(i32 noundef %a, i32 noundef %b) {
 entry:
-; CHECK: call i32 @dx.op.binary.i32(i32 37, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 37, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
   %0 = call i32 @llvm.smax.i32(i32 %a, i32 %b)
   ret i32 %0
 }
@@ -21,11 +21,13 @@ entry:
 ; CHECK-LABEL:test_smax_i64
 define noundef i64 @test_smax_i64(i64 noundef %a, i64 noundef %b) {
 entry:
-; CHECK: call i64 @dx.op.binary.i64(i32 37, i64 %{{.*}}, i64 %{{.*}})
+; CHECK: call i64 @dx.op.binary.i64(i32 37, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
   %0 = call i64 @llvm.smax.i64(i64 %a, i64 %b)
   ret i64 %0
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i16 @llvm.smax.i16(i16, i16)
 declare i32 @llvm.smax.i32(i32, i32)
 declare i64 @llvm.smax.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/smin.ll b/llvm/test/CodeGen/DirectX/smin.ll
index 8d4884704df213..afa04532f15091 100644
--- a/llvm/test/CodeGen/DirectX/smin.ll
+++ b/llvm/test/CodeGen/DirectX/smin.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL:test_smin_i16
 define noundef i16 @test_smin_i16(i16 noundef %a, i16 noundef %b) {
 entry:
-; CHECK: call i16 @dx.op.binary.i16(i32 38, i16 %{{.*}}, i16 %{{.*}})
+; CHECK: call i16 @dx.op.binary.i16(i32 38, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
   %0 = call i16 @llvm.smin.i16(i16 %a, i16 %b)
   ret i16 %0
 }
@@ -13,7 +13,7 @@ entry:
 ; CHECK-LABEL:test_smin_i32
 define noundef i32 @test_smin_i32(i32 noundef %a, i32 noundef %b) {
 entry:
-; CHECK: call i32 @dx.op.binary.i32(i32 38, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 38, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
   %0 = call i32 @llvm.smin.i32(i32 %a, i32 %b)
   ret i32 %0
 }
@@ -21,11 +21,13 @@ entry:
 ; CHECK-LABEL:test_smin_i64
 define noundef i64 @test_smin_i64(i64 noundef %a, i64 noundef %b) {
 entry:
-; CHECK: call i64 @dx.op.binary.i64(i32 38, i64 %{{.*}}, i64 %{{.*}})
+; CHECK: call i64 @dx.op.binary.i64(i32 38, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
   %0 = call i64 @llvm.smin.i64(i64 %a, i64 %b)
   ret i64 %0
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i16 @llvm.smin.i16(i16, i16)
 declare i32 @llvm.smin.i32(i32, i32)
 declare i64 @llvm.smin.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/splitdouble.ll b/llvm/test/CodeGen/DirectX/splitdouble.ll
index 1443ba6269255a..97a9575c646339 100644
--- a/llvm/test/CodeGen/DirectX/splitdouble.ll
+++ b/llvm/test/CodeGen/DirectX/splitdouble.ll
@@ -5,7 +5,7 @@ define i32 @test_scalar(double noundef %D) {
 ; CHECK-LABEL: define i32 @test_scalar(
 ; CHECK-SAME: double noundef [[D:%.*]]) {
 ; NOLOWER-NEXT:    [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D]])
-; WITHLOWER-NEXT:  [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D]]) #[[#ATTR:]]
 ; NOLOWER-NEXT:    [[EV1:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 0
 ; NOLOWER-NEXT:    [[EV2:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 1
 ; WITHLOWER-NEXT:  [[EV1:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I0]], 0
@@ -26,10 +26,10 @@ define void @test_vector_double_split_void(<2 x double> noundef %d) {
 ; CHECK-SAME: <2 x double> noundef [[D:%.*]]) {
 ; CHECK-NEXT:      [[D_I0:%.*]] = extractelement <2 x double> [[D]], i64 0
 ; NOLOWER-NEXT:    [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]])
-; WITHLOWER-NEXT:  [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I0]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I0]]) #[[#ATTR]]
 ; CHECK-NEXT:      [[D_I1:%.*]] = extractelement <2 x double> [[D]], i64 1
 ; NOLOWER-NEXT:    [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]])
-; WITHLOWER-NEXT:  [[HLSL_ASUINT_I1:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I1]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I1:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I1]]) #[[#ATTR]]
 ; CHECK-NEXT:      ret void
 ;
   %hlsl.asuint = call { <2 x i32>, <2 x i32> }  @llvm.dx.splitdouble.v2i32(<2 x double> %d)
@@ -41,13 +41,13 @@ define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) {
 ; CHECK-SAME: <3 x double> noundef [[D:%.*]]) {
 ; CHECK-NEXT:      [[D_I0:%.*]] = extractelement <3 x double> [[D]], i64 0
 ; NOLOWER-NEXT:    [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]])
-; WITHLOWER-NEXT:  [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I0]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I0]]) #[[#ATTR]]
 ; CHECK-NEXT:      [[D_I1:%.*]] = extractelement <3 x double> [[D]], i64 1
 ; NOLOWER-NEXT:    [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]])
-; WITHLOWER-NEXT:  [[HLSL_ASUINT_I1:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I1]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I1:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I1]]) #[[#ATTR]]
 ; CHECK-NEXT:      [[D_I2:%.*]] = extractelement <3 x double> [[D]], i64 2
 ; NOLOWER-NEXT:    [[HLSL_ASUINT_I2:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I2]])
-; WITHLOWER-NEXT:  [[HLSL_ASUINT_I2:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I2]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I2:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I2]]) #[[#ATTR]]
 ; NOLOWER-NEXT:    [[DOTELEM0:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 0
 ; WITHLOWER-NEXT:  [[DOTELEM0:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I0]], 0
 ; NOLOWER-NEXT:    [[DOTELEM01:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 0
@@ -74,3 +74,5 @@ define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) {
   %3 = add <3 x i32> %1, %2
   ret <3 x i32> %3
 }
+
+; WITHLOWER: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
diff --git a/llvm/test/CodeGen/DirectX/sqrt.ll b/llvm/test/CodeGen/DirectX/sqrt.ll
index e2955b4efa2ec4..0bdbb24099aa7d 100644
--- a/llvm/test/CodeGen/DirectX/sqrt.ll
+++ b/llvm/test/CodeGen/DirectX/sqrt.ll
@@ -4,14 +4,14 @@
 
 define noundef float @sqrt_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 24, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 24, float %{{.*}}) #[[#ATTR:]]
   %elt.sqrt = call float @llvm.sqrt.f32(float %a)
   ret float %elt.sqrt
 }
 
 define noundef half @sqrt_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 24, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 24, half %{{.*}}) #[[#ATTR]]
   %elt.sqrt = call half @llvm.sqrt.f16(half %a)
   ret half %elt.sqrt
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @sqrt_float4(<4 x float> noundef %a) {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.sqrt.f16(half)
 declare float @llvm.sqrt.f32(float)
 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/tan.ll b/llvm/test/CodeGen/DirectX/tan.ll
index cf6965a95c04e1..753198bd110517 100644
--- a/llvm/test/CodeGen/DirectX/tan.ll
+++ b/llvm/test/CodeGen/DirectX/tan.ll
@@ -4,14 +4,14 @@
 
 define noundef float @tan_float(float noundef %a) #0 {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 14, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 14, float %{{.*}}) #[[#ATTR:]]
   %elt.tan = call float @llvm.tan.f32(float %a)
   ret float %elt.tan
 }
 
 define noundef half @tan_half(half noundef %a) #0 {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 14, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 14, half %{{.*}}) #[[#ATTR]]
   %elt.tan = call half @llvm.tan.f16(half %a)
   ret half %elt.tan
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @tan_float4(<4 x float> noundef %a) #0 {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.tan.f16(half)
 declare float @llvm.tan.f32(float)
 declare <4 x float> @llvm.tan.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/tanh.ll b/llvm/test/CodeGen/DirectX/tanh.ll
index 54ec6f29fa0c3c..14387508c2e244 100644
--- a/llvm/test/CodeGen/DirectX/tanh.ll
+++ b/llvm/test/CodeGen/DirectX/tanh.ll
@@ -4,14 +4,14 @@
 
 define noundef float @tan_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 20, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 20, float %{{.*}}) #[[#ATTR:]]
   %elt.tanh = call float @llvm.tanh.f32(float %a)
   ret float %elt.tanh
 }
 
 define noundef half @tan_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 20, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 20, half %{{.*}}) #[[#ATTR]]
   %elt.tanh = call half @llvm.tanh.f16(half %a)
   ret half %elt.tanh
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @tanh_float4(<4 x float> noundef %a) #0 {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.tanh.f16(half)
 declare float @llvm.tanh.f32(float)
 declare <4 x float> @llvm.tanh.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/trunc.ll b/llvm/test/CodeGen/DirectX/trunc.ll
index 6d9c222595c448..c8bdeb0d42dce2 100644
--- a/llvm/test/CodeGen/DirectX/trunc.ll
+++ b/llvm/test/CodeGen/DirectX/trunc.ll
@@ -4,14 +4,14 @@
 
 define noundef float @trunc_float(float noundef %a) {
 entry:
-; CHECK:call float @dx.op.unary.f32(i32 29, float %{{.*}})
+; CHECK:call float @dx.op.unary.f32(i32 29, float %{{.*}}) #[[#ATTR:]]
   %elt.trunc = call float @llvm.trunc.f32(float %a)
   ret float %elt.trunc
 }
 
 define noundef half @trunc_half(half noundef %a) {
 entry:
-; CHECK:call half @dx.op.unary.f16(i32 29, half %{{.*}})
+; CHECK:call half @dx.op.unary.f16(i32 29, half %{{.*}}) #[[#ATTR]]
   %elt.trunc = call half @llvm.trunc.f16(half %a)
   ret half %elt.trunc
 }
@@ -19,13 +19,13 @@ entry:
 define noundef <4 x float> @trunc_float4(<4 x float> noundef %a) #0 {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee0]]) #[[#ATTR]]
   ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee1]]) #[[#ATTR]]
   ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee2]]) #[[#ATTR]]
   ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee3]]) #[[#ATTR]]
   ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
   ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
@@ -34,6 +34,8 @@ entry:
   ret <4 x float> %2
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare half @llvm.trunc.f16(half)
 declare float @llvm.trunc.f32(float)
 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/umad.ll b/llvm/test/CodeGen/DirectX/umad.ll
index 583fdddfe03f34..104d2380af66b7 100644
--- a/llvm/test/CodeGen/DirectX/umad.ll
+++ b/llvm/test/CodeGen/DirectX/umad.ll
@@ -1,9 +1,11 @@
 ; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
 
 ; Make sure dxil operation function calls for round are generated for float and half.
-; CHECK:call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}})
-; CHECK:call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-; CHECK:call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}})
+; CHECK:call i16 @dx.op.tertiary.i16(i32 49, i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
+; CHECK:call i32 @dx.op.tertiary.i32(i32 49, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
+; CHECK:call i64 @dx.op.tertiary.i64(i32 49, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
+
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
 
 target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
 target triple = "dxil-pc-shadermodel6.7-library"
diff --git a/llvm/test/CodeGen/DirectX/umax.ll b/llvm/test/CodeGen/DirectX/umax.ll
index a4bd66ef0bd6c3..623ca2715aeb75 100644
--- a/llvm/test/CodeGen/DirectX/umax.ll
+++ b/llvm/test/CodeGen/DirectX/umax.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL:test_umax_i16
 define noundef i16 @test_umax_i16(i16 noundef %a, i16 noundef %b) {
 entry:
-; CHECK: call i16 @dx.op.binary.i16(i32 39, i16 %{{.*}}, i16 %{{.*}})
+; CHECK: call i16 @dx.op.binary.i16(i32 39, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
   %0 = call i16 @llvm.umax.i16(i16 %a, i16 %b)
   ret i16 %0
 }
@@ -13,7 +13,7 @@ entry:
 ; CHECK-LABEL:test_umax_i32
 define noundef i32 @test_umax_i32(i32 noundef %a, i32 noundef %b) {
 entry:
-; CHECK: call i32 @dx.op.binary.i32(i32 39, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 39, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
   %0 = call i32 @llvm.umax.i32(i32 %a, i32 %b)
   ret i32 %0
 }
@@ -21,11 +21,13 @@ entry:
 ; CHECK-LABEL:test_umax_i64
 define noundef i64 @test_umax_i64(i64 noundef %a, i64 noundef %b) {
 entry:
-; CHECK: call i64 @dx.op.binary.i64(i32 39, i64 %{{.*}}, i64 %{{.*}})
+; CHECK: call i64 @dx.op.binary.i64(i32 39, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
   %0 = call i64 @llvm.umax.i64(i64 %a, i64 %b)
   ret i64 %0
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i16 @llvm.umax.i16(i16, i16)
 declare i32 @llvm.umax.i32(i32, i32)
 declare i64 @llvm.umax.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/umin.ll b/llvm/test/CodeGen/DirectX/umin.ll
index a551f8ff3bfa9d..7546accc4d3b6e 100644
--- a/llvm/test/CodeGen/DirectX/umin.ll
+++ b/llvm/test/CodeGen/DirectX/umin.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL:test_umin_i16
 define noundef i16 @test_umin_i16(i16 noundef %a, i16 noundef %b) {
 entry:
-; CHECK: call i16 @dx.op.binary.i16(i32 40, i16 %{{.*}}, i16 %{{.*}})
+; CHECK: call i16 @dx.op.binary.i16(i32 40, i16 %{{.*}}, i16 %{{.*}}) #[[#ATTR:]]
   %0 = call i16 @llvm.umin.i16(i16 %a, i16 %b)
   ret i16 %0
 }
@@ -13,7 +13,7 @@ entry:
 ; CHECK-LABEL:test_umin_i32
 define noundef i32 @test_umin_i32(i32 noundef %a, i32 noundef %b) {
 entry:
-; CHECK: call i32 @dx.op.binary.i32(i32 40, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 40, i32 %{{.*}}, i32 %{{.*}}) #[[#ATTR]]
   %0 = call i32 @llvm.umin.i32(i32 %a, i32 %b)
   ret i32 %0
 }
@@ -21,11 +21,13 @@ entry:
 ; CHECK-LABEL:test_umin_i64
 define noundef i64 @test_umin_i64(i64 noundef %a, i64 noundef %b) {
 entry:
-; CHECK: call i64 @dx.op.binary.i64(i32 40, i64 %{{.*}}, i64 %{{.*}})
+; CHECK: call i64 @dx.op.binary.i64(i32 40, i64 %{{.*}}, i64 %{{.*}}) #[[#ATTR]]
   %0 = call i64 @llvm.umin.i64(i64 %a, i64 %b)
   ret i64 %0
 }
 
+; CHECK: attributes #[[#ATTR]] = {{{.*}} memory(none) {{.*}}}
+
 declare i16 @llvm.umin.i16(i16, i16)
 declare i32 @llvm.umin.i32(i32, i32)
 declare i64 @llvm.umin.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/wave_is_first_lane.ll b/llvm/test/CodeGen/DirectX/wave_is_first_lane.ll
index 2265dd8f7348c3..6740d4075eee54 100644
--- a/llvm/test/CodeGen/DirectX/wave_is_first_lane.ll
+++ b/llvm/test/CodeGen/DirectX/wave_is_first_lane.ll
@@ -7,6 +7,8 @@ entry:
   ret void
 }
 
+; CHECK-NOT: attributes {{.*}} memory(none)
+
 declare i1 @llvm.dx.wave.is.first.lane() #1
 
 attributes #0 = { convergent norecurse "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }

>From fd5dbce0fdb6f57e1349ad122eebd55ce06f2c6d Mon Sep 17 00:00:00 2001
From: Finn Plummer <canadienfinn at gmail.com>
Date: Wed, 20 Nov 2024 21:48:30 +0000
Subject: [PATCH 04/12] define DXILProperty for queryable attributes

- define these properties in DXIL.td and DXILConstants.h
- emit DXIL definitions as enumerations
- emit some helper functions to query OpCodeProp for each property class
---
 llvm/lib/Target/DirectX/DXIL.td           | 15 ++++++
 llvm/lib/Target/DirectX/DXILConstants.h   |  5 ++
 llvm/lib/Target/DirectX/DXILOpBuilder.cpp |  1 +
 llvm/utils/TableGen/DXILEmitter.cpp       | 59 ++++++++++++++++++++++-
 4 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index c08ff14a95afa0..e191701de19df4 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -279,6 +279,18 @@ def ReadOnly : DXILAttribute;
 def NoDuplicate : DXILAttribute;
 def NoReturn : DXILAttribute;
 
+// A property is simply used to mark a DXIL op belongs to a sub-group of
+// DXIL ops, and it is used to query if a particular holds this property.
+// This is used for static analysis of DXIL ops.
+class DXILProperty;
+
+def IsBarrier : DXILProperty;
+def IsDerivative : DXILProperty;
+def IsGradient : DXILProperty;
+def IsFeedback : DXILProperty;
+def IsWave : DXILProperty;
+def RequiresUniformInputs : DXILProperty;
+
 class Overloads<Version ver, list<DXILOpParamType> ols> {
   Version dxil_version = ver;
   list<DXILOpParamType> overload_types = ols;
@@ -376,6 +388,9 @@ class DXILOp<int opcode, DXILOpClass opclass> {
 
   // Versioned attributes of operation
   list<Attributes> attributes = [];
+
+  // List of properties. Default to no properties.
+  list<DXILProperty> properties = [];
 }
 
 // Concrete definitions of DXIL Operations
diff --git a/llvm/lib/Target/DirectX/DXILConstants.h b/llvm/lib/Target/DirectX/DXILConstants.h
index d93892f27a0d29..229401d6b271aa 100644
--- a/llvm/lib/Target/DirectX/DXILConstants.h
+++ b/llvm/lib/Target/DirectX/DXILConstants.h
@@ -35,6 +35,11 @@ enum class Attribute : unsigned {
 #include "DXILOperation.inc"
 };
 
+enum class Property : unsigned {
+#define DXIL_PROPERTY(Name) Name,
+#include "DXILOperation.inc"
+};
+
 } // namespace dxil
 } // namespace llvm
 
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index f1b72cea75c5e7..cae3f2ea43bf8e 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -159,6 +159,7 @@ struct OpCodeProperty {
   llvm::SmallVector<OpOverload> Overloads;
   llvm::SmallVector<OpStage> Stages;
   llvm::SmallVector<OpAttribute> Attributes;
+  llvm::SmallVector<dxil::Property> Properties;
   int OverloadParamIndex; // parameter index which control the overload.
                           // When < 0, should be only 1 overload type.
 };
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index 44be25e481824f..f8a2c8712fd653 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -56,6 +56,7 @@ struct DXILOperationDesc {
   SmallVector<const Record *> OverloadRecs;
   SmallVector<const Record *> StageRecs;
   SmallVector<const Record *> AttrRecs;
+  SmallVector<const Record *> PropRecs;
   SmallVector<DXILIntrinsicSelect> IntrinsicSelects;
   SmallVector<StringRef, 4>
       ShaderStages; // shader stages to which this applies, empty for all.
@@ -177,6 +178,13 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
     AttrRecs.push_back(CR);
   }
 
+  Recs = R->getValueAsListOfDefs("properties");
+
+  // Get property records
+  for (const Record *CR : Recs) {
+    PropRecs.push_back(CR);
+  }
+
   // Get the operation class
   OpClass = R->getValueAsDef("OpClass")->getName();
 
@@ -348,6 +356,28 @@ static std::string getAttributeListString(ArrayRef<const Record *> Recs) {
   return ListString;
 }
 
+/// Return a string representation of valid property information denoted
+// by input records
+//
+/// \param Recs A vector of records of TableGen Property records
+/// \return std::string string representation of properties list string
+//          {Attr1, Attr2, ...}
+static std::string getPropertyListString(ArrayRef<const Record *> Recs) {
+  std::string ListString = "";
+  std::string Prefix = "";
+  ListString.append("{");
+
+  std::string CommaPrefix = "";
+  for (const auto *Rec : Recs) {
+    ListString.append(CommaPrefix)
+        .append("dxil::Property::")
+        .append(Rec->getName());
+    CommaPrefix = ", ";
+  }
+  ListString.append("}");
+  return ListString;
+}
+
 /// Emit a mapping of DXIL opcode to opname
 static void emitDXILOpCodes(ArrayRef<DXILOperationDesc> Ops, raw_ostream &OS) {
   OS << "#ifdef DXIL_OPCODE\n";
@@ -386,6 +416,30 @@ static void emitDXILAttributes(const RecordKeeper &Records, raw_ostream &OS) {
   OS << "#endif\n\n";
 }
 
+/// Emit a list of DXIL op properties and their query functions
+static void emitDXILProperties(const RecordKeeper &Records, raw_ostream &OS) {
+  // Generate their definitions
+  OS << "#ifdef DXIL_PROPERTY\n";
+  for (const Record *Prop: Records.getAllDerivedDefinitions("DXILProperty"))
+    OS << "DXIL_PROPERTY(" << Prop->getName() << ")\n";
+  OS << "#undef DXIL_PROPERTY\n";
+  OS << "#endif\n\n";
+}
+
+static void emitDXILPropertyHelpers(const RecordKeeper &Records, raw_ostream &OS) {
+  // Generate their helper functions
+  for (const Record *Prop: Records.getAllDerivedDefinitions("DXILProperty")) {
+    OS << "[[maybe_unused]]\n";
+    OS << "static bool has" << Prop->getName() << "(dxil::OpCode Op) {\n";
+    OS << "  auto *OpCodeProp = getOpCodeProperty(Op);\n";
+    OS << "  for (auto Prop : OpCodeProp->Properties)\n";
+    OS << "    if (Prop == dxil::Property::" << Prop->getName() << ")\n";
+    OS << "      return true;\n";
+    OS << "  return false;\n";
+    OS << "}\n\n";
+  }
+}
+
 /// Emit a list of DXIL op function types
 static void emitDXILOpFunctionTypes(ArrayRef<DXILOperationDesc> Ops,
                                     raw_ostream &OS) {
@@ -482,7 +536,8 @@ static void emitDXILOperationTable(ArrayRef<DXILOperationDesc> Ops,
        << OpClassStrings.get(Op.OpClass.data()) << ", "
        << getOverloadMaskString(Op.OverloadRecs) << ", "
        << getStageMaskString(Op.StageRecs) << ", "
-       << getAttributeListString(Op.AttrRecs) << ", " << Op.OverloadParamIndex
+       << getAttributeListString(Op.AttrRecs) << ", "
+       << getPropertyListString(Op.PropRecs) << ", " << Op.OverloadParamIndex
        << " }";
     Prefix = ",\n";
   }
@@ -588,12 +643,14 @@ static void emitDxilOperation(const RecordKeeper &Records, raw_ostream &OS) {
   emitDXILOpClasses(Records, OS);
   emitDXILOpParamTypes(Records, OS);
   emitDXILAttributes(Records, OS);
+  emitDXILProperties(Records, OS);
   emitDXILOpFunctionTypes(DXILOps, OS);
   emitDXILIntrinsicArgSelectTypes(Records, OS);
   emitDXILIntrinsicMap(DXILOps, OS);
   OS << "#ifdef DXIL_OP_OPERATION_TABLE\n\n";
   emitDXILOperationTableDataStructs(Records, OS);
   emitDXILOperationTable(DXILOps, OS);
+  emitDXILPropertyHelpers(Records, OS);
   OS << "#undef DXIL_OP_OPERATION_TABLE\n";
   OS << "#endif\n\n";
 }

>From 66c0750512339d3eadda3262685ee4846827ac60 Mon Sep 17 00:00:00 2001
From: Finn Plummer <canadienfinn at gmail.com>
Date: Wed, 20 Nov 2024 22:43:40 +0000
Subject: [PATCH 05/12] clang format

---
 llvm/utils/TableGen/DXILEmitter.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index f8a2c8712fd653..7970f02c315f94 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -420,15 +420,16 @@ static void emitDXILAttributes(const RecordKeeper &Records, raw_ostream &OS) {
 static void emitDXILProperties(const RecordKeeper &Records, raw_ostream &OS) {
   // Generate their definitions
   OS << "#ifdef DXIL_PROPERTY\n";
-  for (const Record *Prop: Records.getAllDerivedDefinitions("DXILProperty"))
+  for (const Record *Prop : Records.getAllDerivedDefinitions("DXILProperty"))
     OS << "DXIL_PROPERTY(" << Prop->getName() << ")\n";
   OS << "#undef DXIL_PROPERTY\n";
   OS << "#endif\n\n";
 }
 
-static void emitDXILPropertyHelpers(const RecordKeeper &Records, raw_ostream &OS) {
+static void emitDXILPropertyHelpers(const RecordKeeper &Records,
+                                    raw_ostream &OS) {
   // Generate their helper functions
-  for (const Record *Prop: Records.getAllDerivedDefinitions("DXILProperty")) {
+  for (const Record *Prop : Records.getAllDerivedDefinitions("DXILProperty")) {
     OS << "[[maybe_unused]]\n";
     OS << "static bool has" << Prop->getName() << "(dxil::OpCode Op) {\n";
     OS << "  auto *OpCodeProp = getOpCodeProperty(Op);\n";

>From 4205861e2927fcaebe73006c97e3d4908e55a242 Mon Sep 17 00:00:00 2001
From: Finn Plummer <canadienfinn at gmail.com>
Date: Thu, 21 Nov 2024 22:31:38 +0000
Subject: [PATCH 06/12] change to emitting a general query helper

- instead of generating a query helper for each property, just use a
general one for all property enums
---
 llvm/utils/TableGen/DXILEmitter.cpp | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index 7970f02c315f94..aaf418aa3a070e 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -426,19 +426,16 @@ static void emitDXILProperties(const RecordKeeper &Records, raw_ostream &OS) {
   OS << "#endif\n\n";
 }
 
-static void emitDXILPropertyHelpers(const RecordKeeper &Records,
-                                    raw_ostream &OS) {
-  // Generate their helper functions
-  for (const Record *Prop : Records.getAllDerivedDefinitions("DXILProperty")) {
-    OS << "[[maybe_unused]]\n";
-    OS << "static bool has" << Prop->getName() << "(dxil::OpCode Op) {\n";
-    OS << "  auto *OpCodeProp = getOpCodeProperty(Op);\n";
-    OS << "  for (auto Prop : OpCodeProp->Properties)\n";
-    OS << "    if (Prop == dxil::Property::" << Prop->getName() << ")\n";
-    OS << "      return true;\n";
-    OS << "  return false;\n";
-    OS << "}\n\n";
-  }
+static void emitDXILPropertyHelper(raw_ostream &OS) {
+  // Generate helper function to query all the functions
+  OS << "[[maybe_unused]]\n";
+  OS << "static bool hasProperty(dxil::OpCode Op, dxil::Property Prop) {\n";
+  OS << "  auto *OpCodeProp = getOpCodeProperty(Op);\n";
+  OS << "  for (auto CurProp : OpCodeProp->Properties)\n";
+  OS << "    if (CurProp == Prop)\n";
+  OS << "      return true;\n";
+  OS << "  return false;\n";
+  OS << "}\n\n";
 }
 
 /// Emit a list of DXIL op function types
@@ -651,7 +648,7 @@ static void emitDxilOperation(const RecordKeeper &Records, raw_ostream &OS) {
   OS << "#ifdef DXIL_OP_OPERATION_TABLE\n\n";
   emitDXILOperationTableDataStructs(Records, OS);
   emitDXILOperationTable(DXILOps, OS);
-  emitDXILPropertyHelpers(Records, OS);
+  emitDXILPropertyHelper(OS);
   OS << "#undef DXIL_OP_OPERATION_TABLE\n";
   OS << "#endif\n\n";
 }

>From eb102907c8ff0d205292ceb1604aa135205f3051 Mon Sep 17 00:00:00 2001
From: Finn Plummer <canadienfinn at gmail.com>
Date: Thu, 21 Nov 2024 22:38:07 +0000
Subject: [PATCH 07/12] add newly added op on rebase

---
 llvm/test/CodeGen/DirectX/WaveActiveAnyTrue.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/DirectX/WaveActiveAnyTrue.ll b/llvm/test/CodeGen/DirectX/WaveActiveAnyTrue.ll
index 5adf050a76c98f..38f1ce0f9e9c97 100644
--- a/llvm/test/CodeGen/DirectX/WaveActiveAnyTrue.ll
+++ b/llvm/test/CodeGen/DirectX/WaveActiveAnyTrue.ll
@@ -2,7 +2,7 @@
 
 define noundef i1 @wave_any_simple(i1 noundef %p1) {
 entry:
-; CHECK: call i1 @dx.op.waveAnyTrue(i32 113, i1 %p1)
+; CHECK: call i1 @dx.op.waveAnyTrue(i32 113, i1 %p1){{$}}
   %ret = call i1 @llvm.dx.wave.any(i1 %p1)
   ret i1 %ret
 }

>From 4a1561da69c76a4f44939d6478bd47528a657498 Mon Sep 17 00:00:00 2001
From: Finn Plummer <canadienfinn at gmail.com>
Date: Fri, 29 Nov 2024 22:23:37 +0000
Subject: [PATCH 08/12] remove dependcy on OpCodeProperty for helper function

- before this change, we would be required to define OpCodeProperty and
all used structs to use hasProperty, which for all uses outside of
DXILOpBuilder is not usable.
---
 llvm/utils/TableGen/DXILEmitter.cpp | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index aaf418aa3a070e..9184cb54e691dc 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -426,12 +426,22 @@ static void emitDXILProperties(const RecordKeeper &Records, raw_ostream &OS) {
   OS << "#endif\n\n";
 }
 
-static void emitDXILPropertyHelper(raw_ostream &OS) {
+static void emitDXILPropertyHelper(ArrayRef<DXILOperationDesc> Ops,
+                                   raw_ostream &OS) {
   // Generate helper function to query all the functions
-  OS << "[[maybe_unused]]\n";
+  OS << "static llvm::SmallVector<dxil::Property> getProperties(dxil::OpCode "
+        "Op) {\n";
+  OS << "  switch (Op) {\n";
+  for (const auto &Op : Ops) {
+    OS << "  case dxil::OpCode::" << Op.OpName << ": return "
+       << getPropertyListString(Op.PropRecs) << ";\n";
+  }
+  OS << "  }\n";
+  OS << "  return {};\n";
+  OS << "}\n\n";
   OS << "static bool hasProperty(dxil::OpCode Op, dxil::Property Prop) {\n";
-  OS << "  auto *OpCodeProp = getOpCodeProperty(Op);\n";
-  OS << "  for (auto CurProp : OpCodeProp->Properties)\n";
+  OS << "  auto Properties = getProperties(Op);\n";
+  OS << "  for (auto CurProp : Properties)\n";
   OS << "    if (CurProp == Prop)\n";
   OS << "      return true;\n";
   OS << "  return false;\n";
@@ -648,9 +658,12 @@ static void emitDxilOperation(const RecordKeeper &Records, raw_ostream &OS) {
   OS << "#ifdef DXIL_OP_OPERATION_TABLE\n\n";
   emitDXILOperationTableDataStructs(Records, OS);
   emitDXILOperationTable(DXILOps, OS);
-  emitDXILPropertyHelper(OS);
   OS << "#undef DXIL_OP_OPERATION_TABLE\n";
   OS << "#endif\n\n";
+  OS << "#ifdef DXIL_OP_PROPERTY_HELPER\n";
+  emitDXILPropertyHelper(DXILOps, OS);
+  OS << "#undef DXIL_OP_PROPERTY_HELPER\n";
+  OS << "#endif\n\n";
 }
 
 static TableGen::Emitter::Opt X("gen-dxil-operation", emitDxilOperation,

>From 5c4c605250f28dc284d4f1265d80490590e07754 Mon Sep 17 00:00:00 2001
From: Finn Plummer <canadienfinn at gmail.com>
Date: Fri, 29 Nov 2024 22:26:20 +0000
Subject: [PATCH 09/12] review comment:

- remove `IsDerivative` property as it was not used in `hctdb.py`
---
 llvm/lib/Target/DirectX/DXIL.td | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index e191701de19df4..fc60a80dcf114b 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -285,7 +285,6 @@ def NoReturn : DXILAttribute;
 class DXILProperty;
 
 def IsBarrier : DXILProperty;
-def IsDerivative : DXILProperty;
 def IsGradient : DXILProperty;
 def IsFeedback : DXILProperty;
 def IsWave : DXILProperty;

>From c08662c1f91b7aa203ce37b9b80f21d22769af55 Mon Sep 17 00:00:00 2001
From: Finn Plummer <canadienfinn at gmail.com>
Date: Mon, 16 Dec 2024 21:35:46 +0000
Subject: [PATCH 10/12] sort orders by opcode

---
 llvm/lib/Target/DirectX/DXIL.td | 114 ++++++++++++++++----------------
 1 file changed, 57 insertions(+), 57 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index fc60a80dcf114b..f878b7c271cd37 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -392,7 +392,7 @@ class DXILOp<int opcode, DXILOpClass opclass> {
   list<DXILProperty> properties = [];
 }
 
-// Concrete definitions of DXIL Operations
+// Concrete definitions of DXIL Operations - There are ordered by their OpCode value
 
 def Abs :  DXILOp<6, unary> {
   let Doc = "Returns the absolute value of the input.";
@@ -841,6 +841,20 @@ def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> {
   let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
 }
 
+def Barrier : DXILOp<80, barrier> {
+  let Doc = "inserts a memory barrier in the shader";
+  let intrinsics = [
+    IntrinSelect<
+        int_dx_group_memory_barrier_with_group_sync,
+        [ IntrinArgI32<BarrierMode_GroupMemoryBarrierWithGroupSync> ]>,
+  ];
+
+  let arguments = [Int32Ty];
+  let result = VoidTy;
+  let stages = [Stages<DXIL1_0, [compute, library]>];
+  let attributes = [Attributes<DXIL1_0, []>];
+}
+
 def Discard : DXILOp<82, discard> {
   let Doc = "discard the current pixel";
   let intrinsics = [ IntrinSelect<int_dx_discard> ];
@@ -907,6 +921,48 @@ def SplitDouble :  DXILOp<102, splitDouble> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
+def WaveIsFirstLane :  DXILOp<110, waveIsFirstLane> {
+  let Doc = "returns 1 for the first lane in the wave";
+  let intrinsics = [ IntrinSelect<int_dx_wave_is_first_lane> ];
+  let arguments = [];
+  let result = Int1Ty;
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
+def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> {
+  let Doc = "returns the index of the current lane in the wave";
+  let intrinsics = [ IntrinSelect<int_dx_wave_getlaneindex> ];
+  let arguments = [];
+  let result = Int32Ty;
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
+}
+
+def WaveActiveAnyTrue : DXILOp<113, waveAnyTrue> {
+  let Doc = "returns true if the expression is true in any of the active lanes in the current wave";
+  let intrinsics = [ IntrinSelect<int_dx_wave_any> ];
+  let arguments = [Int1Ty];
+  let result = Int1Ty;
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
+def WaveReadLaneAt:  DXILOp<117, waveReadLaneAt> {
+  let Doc = "returns the value from the specified lane";
+  let intrinsics = [ IntrinSelect<int_dx_wave_readlane> ];
+  let arguments = [OverloadTy, Int32Ty];
+  let result = OverloadTy;
+  let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy, Int1Ty, Int16Ty, Int32Ty, Int64Ty]>];
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
+def WaveAllBitCount : DXILOp<135, waveAllOp> {
+  let Doc = "returns the count of bits set to 1 across the wave";
+  let intrinsics = [ IntrinSelect<int_dx_wave_active_countbits> ];
+  let arguments = [Int1Ty];
+  let result = Int32Ty;
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
 def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> {
   let Doc = "signed dot product of 4 x i8 vectors packed into i32, with "
             "accumulate to i32";
@@ -942,59 +998,3 @@ def CreateHandleFromBinding : DXILOp<217, createHandleFromBinding> {
   let stages = [Stages<DXIL1_6, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
-
-def WaveActiveAnyTrue : DXILOp<113, waveAnyTrue> {
-  let Doc = "returns true if the expression is true in any of the active lanes in the current wave";
-  let intrinsics = [ IntrinSelect<int_dx_wave_any> ];
-  let arguments = [Int1Ty];
-  let result = Int1Ty;
-  let stages = [Stages<DXIL1_0, [all_stages]>];
-}
-
-def WaveIsFirstLane :  DXILOp<110, waveIsFirstLane> {
-  let Doc = "returns 1 for the first lane in the wave";
-  let intrinsics = [ IntrinSelect<int_dx_wave_is_first_lane> ];
-  let arguments = [];
-  let result = Int1Ty;
-  let stages = [Stages<DXIL1_0, [all_stages]>];
-}
-
-def WaveReadLaneAt:  DXILOp<117, waveReadLaneAt> {
-  let Doc = "returns the value from the specified lane";
-  let intrinsics = [ IntrinSelect<int_dx_wave_readlane> ];
-  let arguments = [OverloadTy, Int32Ty];
-  let result = OverloadTy;
-  let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy, Int1Ty, Int16Ty, Int32Ty, Int64Ty]>];
-  let stages = [Stages<DXIL1_0, [all_stages]>];
-}
-
-def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> {
-  let Doc = "returns the index of the current lane in the wave";
-  let intrinsics = [ IntrinSelect<int_dx_wave_getlaneindex> ];
-  let arguments = [];
-  let result = Int32Ty;
-  let stages = [Stages<DXIL1_0, [all_stages]>];
-  let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
-}
-
-def WaveAllBitCount : DXILOp<135, waveAllOp> {
-  let Doc = "returns the count of bits set to 1 across the wave";
-  let intrinsics = [ IntrinSelect<int_dx_wave_active_countbits> ];
-  let arguments = [Int1Ty];
-  let result = Int32Ty;
-  let stages = [Stages<DXIL1_0, [all_stages]>];
-}
-
-def Barrier : DXILOp<80, barrier> {
-  let Doc = "inserts a memory barrier in the shader";
-  let intrinsics = [
-    IntrinSelect<
-        int_dx_group_memory_barrier_with_group_sync,
-        [ IntrinArgI32<BarrierMode_GroupMemoryBarrierWithGroupSync> ]>,
-  ];
-
-  let arguments = [Int32Ty];
-  let result = VoidTy;
-  let stages = [Stages<DXIL1_0, [compute, library]>];
-  let attributes = [Attributes<DXIL1_0, []>];
-}

>From 2e4d5c28073476c71f5ea399679ffc7ab66a0dc2 Mon Sep 17 00:00:00 2001
From: Finn Plummer <canadienfinn at gmail.com>
Date: Mon, 16 Dec 2024 21:39:41 +0000
Subject: [PATCH 11/12] apply clang-format to DXIL.td

---
 llvm/lib/Target/DirectX/DXIL.td | 646 ++++++++++++++++----------------
 1 file changed, 318 insertions(+), 328 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index f878b7c271cd37..852953a6b0df14 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -51,195 +51,194 @@ def SplitDoubleTy : DXILOpParamType;
 
 class DXILOpClass;
 
-defset list<DXILOpClass> OpClasses = {
-  def acceptHitAndEndSearch : DXILOpClass;
-  def allocateNodeOutputRecords : DXILOpClass;
-  def allocateRayQuery : DXILOpClass;
-  def annotateHandle : DXILOpClass;
-  def annotateNodeHandle : DXILOpClass;
-  def annotateNodeRecordHandle : DXILOpClass;
-  def atomicBinOp : DXILOpClass;
-  def atomicCompareExchange : DXILOpClass;
-  def attributeAtVertex : DXILOpClass;
-  def barrier : DXILOpClass;
-  def barrierByMemoryHandle : DXILOpClass;
-  def barrierByMemoryType : DXILOpClass;
-  def barrierByNodeRecordHandle : DXILOpClass;
-  def binary : DXILOpClass;
-  def binaryWithCarryOrBorrow : DXILOpClass;
-  def binaryWithTwoOuts : DXILOpClass;
-  def bitcastF16toI16 : DXILOpClass;
-  def bitcastF32toI32 : DXILOpClass;
-  def bitcastF64toI64 : DXILOpClass;
-  def bitcastI16toF16 : DXILOpClass;
-  def bitcastI32toF32 : DXILOpClass;
-  def bitcastI64toF64 : DXILOpClass;
-  def bufferLoad : DXILOpClass;
-  def bufferStore : DXILOpClass;
-  def bufferUpdateCounter : DXILOpClass;
-  def calculateLOD : DXILOpClass;
-  def callShader : DXILOpClass;
-  def cbufferLoad : DXILOpClass;
-  def cbufferLoadLegacy : DXILOpClass;
-  def checkAccessFullyMapped : DXILOpClass;
-  def coverage : DXILOpClass;
-  def createHandle : DXILOpClass;
-  def createHandleForLib : DXILOpClass;
-  def createHandleFromBinding : DXILOpClass;
-  def createHandleFromHeap : DXILOpClass;
-  def createNodeInputRecordHandle : DXILOpClass;
-  def createNodeOutputHandle : DXILOpClass;
-  def cutStream : DXILOpClass;
-  def cycleCounterLegacy : DXILOpClass;
-  def discard : DXILOpClass;
-  def dispatchMesh : DXILOpClass;
-  def dispatchRaysDimensions : DXILOpClass;
-  def dispatchRaysIndex : DXILOpClass;
-  def domainLocation : DXILOpClass;
-  def dot2 : DXILOpClass;
-  def dot2AddHalf : DXILOpClass;
-  def dot3 : DXILOpClass;
-  def dot4 : DXILOpClass;
-  def dot4AddPacked : DXILOpClass;
-  def emitIndices : DXILOpClass;
-  def emitStream : DXILOpClass;
-  def emitThenCutStream : DXILOpClass;
-  def evalCentroid : DXILOpClass;
-  def evalSampleIndex : DXILOpClass;
-  def evalSnapped : DXILOpClass;
-  def finishedCrossGroupSharing : DXILOpClass;
-  def flattenedThreadIdInGroup : DXILOpClass;
-  def geometryIndex : DXILOpClass;
-  def getDimensions : DXILOpClass;
-  def getInputRecordCount : DXILOpClass;
-  def getMeshPayload : DXILOpClass;
-  def getNodeRecordPtr : DXILOpClass;
-  def getRemainingRecursionLevels : DXILOpClass;
-  def groupId : DXILOpClass;
-  def gsInstanceID : DXILOpClass;
-  def hitKind : DXILOpClass;
-  def ignoreHit : DXILOpClass;
-  def incrementOutputCount : DXILOpClass;
-  def indexNodeHandle : DXILOpClass;
-  def innerCoverage : DXILOpClass;
-  def instanceID : DXILOpClass;
-  def instanceIndex : DXILOpClass;
-  def isHelperLane : DXILOpClass;
-  def isSpecialFloat : DXILOpClass;
-  def legacyDoubleToFloat : DXILOpClass;
-  def legacyDoubleToSInt32 : DXILOpClass;
-  def legacyDoubleToUInt32 : DXILOpClass;
-  def legacyF16ToF32 : DXILOpClass;
-  def legacyF32ToF16 : DXILOpClass;
-  def loadInput : DXILOpClass;
-  def loadOutputControlPoint : DXILOpClass;
-  def loadPatchConstant : DXILOpClass;
-  def makeDouble : DXILOpClass;
-  def minPrecXRegLoad : DXILOpClass;
-  def minPrecXRegStore : DXILOpClass;
-  def nodeOutputIsValid : DXILOpClass;
-  def objectRayDirection : DXILOpClass;
-  def objectRayOrigin : DXILOpClass;
-  def objectToWorld : DXILOpClass;
-  def outputComplete : DXILOpClass;
-  def outputControlPointID : DXILOpClass;
-  def pack4x8 : DXILOpClass;
-  def primitiveID : DXILOpClass;
-  def primitiveIndex : DXILOpClass;
-  def quadOp : DXILOpClass;
-  def quadReadLaneAt : DXILOpClass;
-  def quadVote : DXILOpClass;
-  def quaternary : DXILOpClass;
-  def rawBufferLoad : DXILOpClass;
-  def rawBufferStore : DXILOpClass;
-  def rayFlags : DXILOpClass;
-  def rayQuery_Abort : DXILOpClass;
-  def rayQuery_CommitNonOpaqueTriangleHit : DXILOpClass;
-  def rayQuery_CommitProceduralPrimitiveHit : DXILOpClass;
-  def rayQuery_Proceed : DXILOpClass;
-  def rayQuery_StateMatrix : DXILOpClass;
-  def rayQuery_StateScalar : DXILOpClass;
-  def rayQuery_StateVector : DXILOpClass;
-  def rayQuery_TraceRayInline : DXILOpClass;
-  def rayTCurrent : DXILOpClass;
-  def rayTMin : DXILOpClass;
-  def renderTargetGetSampleCount : DXILOpClass;
-  def renderTargetGetSamplePosition : DXILOpClass;
-  def reportHit : DXILOpClass;
-  def sample : DXILOpClass;
-  def sampleBias : DXILOpClass;
-  def sampleCmp : DXILOpClass;
-  def sampleCmpBias : DXILOpClass;
-  def sampleCmpGrad : DXILOpClass;
-  def sampleCmpLevel : DXILOpClass;
-  def sampleCmpLevelZero : DXILOpClass;
-  def sampleGrad : DXILOpClass;
-  def sampleIndex : DXILOpClass;
-  def sampleLevel : DXILOpClass;
-  def setMeshOutputCounts : DXILOpClass;
-  def splitDouble : DXILOpClass;
-  def startInstanceLocation : DXILOpClass;
-  def startVertexLocation : DXILOpClass;
-  def storeOutput : DXILOpClass;
-  def storePatchConstant : DXILOpClass;
-  def storePrimitiveOutput : DXILOpClass;
-  def storeVertexOutput : DXILOpClass;
-  def tempRegLoad : DXILOpClass;
-  def tempRegStore : DXILOpClass;
-  def tertiary : DXILOpClass;
-  def texture2DMSGetSamplePosition : DXILOpClass;
-  def textureGather : DXILOpClass;
-  def textureGatherCmp : DXILOpClass;
-  def textureGatherRaw : DXILOpClass;
-  def textureLoad : DXILOpClass;
-  def textureStore : DXILOpClass;
-  def textureStoreSample : DXILOpClass;
-  def threadId : DXILOpClass;
-  def threadIdInGroup : DXILOpClass;
-  def traceRay : DXILOpClass;
-  def unary : DXILOpClass;
-  def unaryBits : DXILOpClass;
-  def unpack4x8 : DXILOpClass;
-  def viewID : DXILOpClass;
-  def waveActiveAllEqual : DXILOpClass;
-  def waveActiveBallot : DXILOpClass;
-  def waveActiveBit : DXILOpClass;
-  def waveActiveOp : DXILOpClass;
-  def waveAllOp : DXILOpClass;
-  def waveAllTrue : DXILOpClass;
-  def waveAnyTrue : DXILOpClass;
-  def waveGetLaneCount : DXILOpClass;
-  def waveGetLaneIndex : DXILOpClass;
-  def waveIsFirstLane : DXILOpClass;
-  def waveMatch : DXILOpClass;
-  def waveMatrix_Accumulate : DXILOpClass;
-  def waveMatrix_Annotate : DXILOpClass;
-  def waveMatrix_Depth : DXILOpClass;
-  def waveMatrix_Fill : DXILOpClass;
-  def waveMatrix_LoadGroupShared : DXILOpClass;
-  def waveMatrix_LoadRawBuf : DXILOpClass;
-  def waveMatrix_Multiply : DXILOpClass;
-  def waveMatrix_ScalarOp : DXILOpClass;
-  def waveMatrix_StoreGroupShared : DXILOpClass;
-  def waveMatrix_StoreRawBuf : DXILOpClass;
-  def waveMultiPrefixBitCount : DXILOpClass;
-  def waveMultiPrefixOp : DXILOpClass;
-  def wavePrefixOp : DXILOpClass;
-  def waveReadLaneAt : DXILOpClass;
-  def waveReadLaneFirst : DXILOpClass;
-  def worldRayDirection : DXILOpClass;
-  def worldRayOrigin : DXILOpClass;
-  def worldToObject : DXILOpClass;
-  def writeSamplerFeedback : DXILOpClass;
-  def writeSamplerFeedbackBias : DXILOpClass;
-  def writeSamplerFeedbackGrad : DXILOpClass;
-  def writeSamplerFeedbackLevel: DXILOpClass;
-
-  // This is a sentinel definition. Hence placed at the end here and
-  // not as part of the above alphabetically sorted valid definitions.
-  // It is never used to construct the name of DXIL Op call name.
-  // Additionally it is capitalized unlike all the others.
-  def UnknownOpClass : DXILOpClass;
+defset list<DXILOpClass> OpClasses = {def acceptHitAndEndSearch: DXILOpClass;
+def allocateNodeOutputRecords : DXILOpClass;
+def allocateRayQuery : DXILOpClass;
+def annotateHandle : DXILOpClass;
+def annotateNodeHandle : DXILOpClass;
+def annotateNodeRecordHandle : DXILOpClass;
+def atomicBinOp : DXILOpClass;
+def atomicCompareExchange : DXILOpClass;
+def attributeAtVertex : DXILOpClass;
+def barrier : DXILOpClass;
+def barrierByMemoryHandle : DXILOpClass;
+def barrierByMemoryType : DXILOpClass;
+def barrierByNodeRecordHandle : DXILOpClass;
+def binary : DXILOpClass;
+def binaryWithCarryOrBorrow : DXILOpClass;
+def binaryWithTwoOuts : DXILOpClass;
+def bitcastF16toI16 : DXILOpClass;
+def bitcastF32toI32 : DXILOpClass;
+def bitcastF64toI64 : DXILOpClass;
+def bitcastI16toF16 : DXILOpClass;
+def bitcastI32toF32 : DXILOpClass;
+def bitcastI64toF64 : DXILOpClass;
+def bufferLoad : DXILOpClass;
+def bufferStore : DXILOpClass;
+def bufferUpdateCounter : DXILOpClass;
+def calculateLOD : DXILOpClass;
+def callShader : DXILOpClass;
+def cbufferLoad : DXILOpClass;
+def cbufferLoadLegacy : DXILOpClass;
+def checkAccessFullyMapped : DXILOpClass;
+def coverage : DXILOpClass;
+def createHandle : DXILOpClass;
+def createHandleForLib : DXILOpClass;
+def createHandleFromBinding : DXILOpClass;
+def createHandleFromHeap : DXILOpClass;
+def createNodeInputRecordHandle : DXILOpClass;
+def createNodeOutputHandle : DXILOpClass;
+def cutStream : DXILOpClass;
+def cycleCounterLegacy : DXILOpClass;
+def discard : DXILOpClass;
+def dispatchMesh : DXILOpClass;
+def dispatchRaysDimensions : DXILOpClass;
+def dispatchRaysIndex : DXILOpClass;
+def domainLocation : DXILOpClass;
+def dot2 : DXILOpClass;
+def dot2AddHalf : DXILOpClass;
+def dot3 : DXILOpClass;
+def dot4 : DXILOpClass;
+def dot4AddPacked : DXILOpClass;
+def emitIndices : DXILOpClass;
+def emitStream : DXILOpClass;
+def emitThenCutStream : DXILOpClass;
+def evalCentroid : DXILOpClass;
+def evalSampleIndex : DXILOpClass;
+def evalSnapped : DXILOpClass;
+def finishedCrossGroupSharing : DXILOpClass;
+def flattenedThreadIdInGroup : DXILOpClass;
+def geometryIndex : DXILOpClass;
+def getDimensions : DXILOpClass;
+def getInputRecordCount : DXILOpClass;
+def getMeshPayload : DXILOpClass;
+def getNodeRecordPtr : DXILOpClass;
+def getRemainingRecursionLevels : DXILOpClass;
+def groupId : DXILOpClass;
+def gsInstanceID : DXILOpClass;
+def hitKind : DXILOpClass;
+def ignoreHit : DXILOpClass;
+def incrementOutputCount : DXILOpClass;
+def indexNodeHandle : DXILOpClass;
+def innerCoverage : DXILOpClass;
+def instanceID : DXILOpClass;
+def instanceIndex : DXILOpClass;
+def isHelperLane : DXILOpClass;
+def isSpecialFloat : DXILOpClass;
+def legacyDoubleToFloat : DXILOpClass;
+def legacyDoubleToSInt32 : DXILOpClass;
+def legacyDoubleToUInt32 : DXILOpClass;
+def legacyF16ToF32 : DXILOpClass;
+def legacyF32ToF16 : DXILOpClass;
+def loadInput : DXILOpClass;
+def loadOutputControlPoint : DXILOpClass;
+def loadPatchConstant : DXILOpClass;
+def makeDouble : DXILOpClass;
+def minPrecXRegLoad : DXILOpClass;
+def minPrecXRegStore : DXILOpClass;
+def nodeOutputIsValid : DXILOpClass;
+def objectRayDirection : DXILOpClass;
+def objectRayOrigin : DXILOpClass;
+def objectToWorld : DXILOpClass;
+def outputComplete : DXILOpClass;
+def outputControlPointID : DXILOpClass;
+def pack4x8 : DXILOpClass;
+def primitiveID : DXILOpClass;
+def primitiveIndex : DXILOpClass;
+def quadOp : DXILOpClass;
+def quadReadLaneAt : DXILOpClass;
+def quadVote : DXILOpClass;
+def quaternary : DXILOpClass;
+def rawBufferLoad : DXILOpClass;
+def rawBufferStore : DXILOpClass;
+def rayFlags : DXILOpClass;
+def rayQuery_Abort : DXILOpClass;
+def rayQuery_CommitNonOpaqueTriangleHit : DXILOpClass;
+def rayQuery_CommitProceduralPrimitiveHit : DXILOpClass;
+def rayQuery_Proceed : DXILOpClass;
+def rayQuery_StateMatrix : DXILOpClass;
+def rayQuery_StateScalar : DXILOpClass;
+def rayQuery_StateVector : DXILOpClass;
+def rayQuery_TraceRayInline : DXILOpClass;
+def rayTCurrent : DXILOpClass;
+def rayTMin : DXILOpClass;
+def renderTargetGetSampleCount : DXILOpClass;
+def renderTargetGetSamplePosition : DXILOpClass;
+def reportHit : DXILOpClass;
+def sample : DXILOpClass;
+def sampleBias : DXILOpClass;
+def sampleCmp : DXILOpClass;
+def sampleCmpBias : DXILOpClass;
+def sampleCmpGrad : DXILOpClass;
+def sampleCmpLevel : DXILOpClass;
+def sampleCmpLevelZero : DXILOpClass;
+def sampleGrad : DXILOpClass;
+def sampleIndex : DXILOpClass;
+def sampleLevel : DXILOpClass;
+def setMeshOutputCounts : DXILOpClass;
+def splitDouble : DXILOpClass;
+def startInstanceLocation : DXILOpClass;
+def startVertexLocation : DXILOpClass;
+def storeOutput : DXILOpClass;
+def storePatchConstant : DXILOpClass;
+def storePrimitiveOutput : DXILOpClass;
+def storeVertexOutput : DXILOpClass;
+def tempRegLoad : DXILOpClass;
+def tempRegStore : DXILOpClass;
+def tertiary : DXILOpClass;
+def texture2DMSGetSamplePosition : DXILOpClass;
+def textureGather : DXILOpClass;
+def textureGatherCmp : DXILOpClass;
+def textureGatherRaw : DXILOpClass;
+def textureLoad : DXILOpClass;
+def textureStore : DXILOpClass;
+def textureStoreSample : DXILOpClass;
+def threadId : DXILOpClass;
+def threadIdInGroup : DXILOpClass;
+def traceRay : DXILOpClass;
+def unary : DXILOpClass;
+def unaryBits : DXILOpClass;
+def unpack4x8 : DXILOpClass;
+def viewID : DXILOpClass;
+def waveActiveAllEqual : DXILOpClass;
+def waveActiveBallot : DXILOpClass;
+def waveActiveBit : DXILOpClass;
+def waveActiveOp : DXILOpClass;
+def waveAllOp : DXILOpClass;
+def waveAllTrue : DXILOpClass;
+def waveAnyTrue : DXILOpClass;
+def waveGetLaneCount : DXILOpClass;
+def waveGetLaneIndex : DXILOpClass;
+def waveIsFirstLane : DXILOpClass;
+def waveMatch : DXILOpClass;
+def waveMatrix_Accumulate : DXILOpClass;
+def waveMatrix_Annotate : DXILOpClass;
+def waveMatrix_Depth : DXILOpClass;
+def waveMatrix_Fill : DXILOpClass;
+def waveMatrix_LoadGroupShared : DXILOpClass;
+def waveMatrix_LoadRawBuf : DXILOpClass;
+def waveMatrix_Multiply : DXILOpClass;
+def waveMatrix_ScalarOp : DXILOpClass;
+def waveMatrix_StoreGroupShared : DXILOpClass;
+def waveMatrix_StoreRawBuf : DXILOpClass;
+def waveMultiPrefixBitCount : DXILOpClass;
+def waveMultiPrefixOp : DXILOpClass;
+def wavePrefixOp : DXILOpClass;
+def waveReadLaneAt : DXILOpClass;
+def waveReadLaneFirst : DXILOpClass;
+def worldRayDirection : DXILOpClass;
+def worldRayOrigin : DXILOpClass;
+def worldToObject : DXILOpClass;
+def writeSamplerFeedback : DXILOpClass;
+def writeSamplerFeedbackBias : DXILOpClass;
+def writeSamplerFeedbackGrad : DXILOpClass;
+def writeSamplerFeedbackLevel : DXILOpClass;
+
+// This is a sentinel definition. Hence placed at the end here and
+// not as part of the above alphabetically sorted valid definitions.
+// It is never used to construct the name of DXIL Op call name.
+// Additionally it is capitalized unlike all the others.
+def UnknownOpClass : DXILOpClass;
 }
 
 class DXILShaderStage;
@@ -305,26 +304,26 @@ class Attributes<Version ver = DXIL1_0, list<DXILAttribute> attrs> {
   list<DXILAttribute> fn_attrs = attrs;
 }
 
-defvar BarrierMode_DeviceMemoryBarrier              = 2;
+defvar BarrierMode_DeviceMemoryBarrier = 2;
 defvar BarrierMode_DeviceMemoryBarrierWithGroupSync = 3;
-defvar BarrierMode_GroupMemoryBarrier               = 8;
-defvar BarrierMode_GroupMemoryBarrierWithGroupSync  = 9;
-defvar BarrierMode_AllMemoryBarrier                 = 10;
-defvar BarrierMode_AllMemoryBarrierWithGroupSync    = 11;
+defvar BarrierMode_GroupMemoryBarrier = 8;
+defvar BarrierMode_GroupMemoryBarrierWithGroupSync = 9;
+defvar BarrierMode_AllMemoryBarrier = 10;
+defvar BarrierMode_AllMemoryBarrierWithGroupSync = 11;
 
 // Intrinsic arg selection
 class IntrinArgSelectType;
 def IntrinArgSelect_Index : IntrinArgSelectType;
-def IntrinArgSelect_I8    : IntrinArgSelectType;
-def IntrinArgSelect_I32   : IntrinArgSelectType;
+def IntrinArgSelect_I8 : IntrinArgSelectType;
+def IntrinArgSelect_I32 : IntrinArgSelectType;
 
 class IntrinArgSelect<IntrinArgSelectType type_, int value_> {
   IntrinArgSelectType type = type_;
   int value = value_;
-}
-class IntrinArgIndex<int index> : IntrinArgSelect<IntrinArgSelect_Index, index>;
-class IntrinArgI8   <int value> : IntrinArgSelect<IntrinArgSelect_I8,    value>;
-class IntrinArgI32  <int value> : IntrinArgSelect<IntrinArgSelect_I32,   value>;
+} class IntrinArgIndex<int index>
+    : IntrinArgSelect<IntrinArgSelect_Index, index>;
+class IntrinArgI8<int value> : IntrinArgSelect<IntrinArgSelect_I8, value>;
+class IntrinArgI32<int value> : IntrinArgSelect<IntrinArgSelect_I32, value>;
 
 // Select which intrinsic to lower from for a DXILOp.
 // If the intrinsic is the only argument given to IntrinSelect, then the
@@ -354,7 +353,8 @@ class IntrinArgI32  <int value> : IntrinArgSelect<IntrinArgSelect_I32,   value>;
 //     >,
 //   ]
 //
-class IntrinSelect<Intrinsic intrinsic_, list<IntrinArgSelect> arg_selects_=[]> {
+class IntrinSelect<Intrinsic intrinsic_,
+                   list<IntrinArgSelect> arg_selects_ = []> {
   Intrinsic intrinsic = intrinsic_;
   list<IntrinArgSelect> arg_selects = arg_selects_;
 }
@@ -392,11 +392,12 @@ class DXILOp<int opcode, DXILOpClass opclass> {
   list<DXILProperty> properties = [];
 }
 
-// Concrete definitions of DXIL Operations - There are ordered by their OpCode value
+// Concrete definitions of DXIL Operations - There are ordered by their OpCode
+// value
 
-def Abs :  DXILOp<6, unary> {
+def Abs : DXILOp<6, unary> {
   let Doc = "Returns the absolute value of the input.";
-  let intrinsics = [ IntrinSelect<int_fabs> ];
+  let intrinsics = [IntrinSelect<int_fabs>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy]>];
@@ -404,9 +405,10 @@ def Abs :  DXILOp<6, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def Saturate :  DXILOp<7, unary> {
-  let Doc = "Clamps a single or double precision floating point value to [0.0f...1.0f].";
-  let intrinsics = [ IntrinSelect<int_dx_saturate> ];
+def Saturate : DXILOp<7, unary> {
+  let Doc = "Clamps a single or double precision floating point value to "
+            "[0.0f...1.0f].";
+  let intrinsics = [IntrinSelect<int_dx_saturate>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy]>];
@@ -414,9 +416,9 @@ def Saturate :  DXILOp<7, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def IsInf :  DXILOp<9, isSpecialFloat> {
+def IsInf : DXILOp<9, isSpecialFloat> {
   let Doc = "Determines if the specified value is infinite.";
-  let intrinsics = [ IntrinSelect<int_dx_isinf> ];
+  let intrinsics = [IntrinSelect<int_dx_isinf>];
   let arguments = [OverloadTy];
   let result = Int1Ty;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -424,9 +426,9 @@ def IsInf :  DXILOp<9, isSpecialFloat> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def Cos :  DXILOp<12, unary> {
+def Cos : DXILOp<12, unary> {
   let Doc = "Returns cosine(theta) for theta in radians.";
-  let intrinsics = [ IntrinSelect<int_cos> ];
+  let intrinsics = [IntrinSelect<int_cos>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -434,9 +436,9 @@ def Cos :  DXILOp<12, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def Sin :  DXILOp<13, unary> {
+def Sin : DXILOp<13, unary> {
   let Doc = "Returns sine(theta) for theta in radians.";
-  let intrinsics = [ IntrinSelect<int_sin> ];
+  let intrinsics = [IntrinSelect<int_sin>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -444,9 +446,9 @@ def Sin :  DXILOp<13, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def Tan :  DXILOp<14, unary> {
+def Tan : DXILOp<14, unary> {
   let Doc = "Returns tangent(theta) for theta in radians.";
-  let intrinsics = [ IntrinSelect<int_tan> ];
+  let intrinsics = [IntrinSelect<int_tan>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -454,9 +456,9 @@ def Tan :  DXILOp<14, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def ACos :  DXILOp<15, unary> {
+def ACos : DXILOp<15, unary> {
   let Doc = "Returns the arccosine of the specified value.";
-  let intrinsics = [ IntrinSelect<int_acos> ];
+  let intrinsics = [IntrinSelect<int_acos>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -464,9 +466,9 @@ def ACos :  DXILOp<15, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def ASin :  DXILOp<16, unary> {
+def ASin : DXILOp<16, unary> {
   let Doc = "Returns the arcsine of the specified value.";
-  let intrinsics = [ IntrinSelect<int_asin> ];
+  let intrinsics = [IntrinSelect<int_asin>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -474,9 +476,9 @@ def ASin :  DXILOp<16, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def ATan :  DXILOp<17, unary> {
+def ATan : DXILOp<17, unary> {
   let Doc = "Returns the arctangent of the specified value.";
-  let intrinsics = [ IntrinSelect<int_atan> ];
+  let intrinsics = [IntrinSelect<int_atan>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -484,9 +486,9 @@ def ATan :  DXILOp<17, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def HCos :  DXILOp<18, unary> {
+def HCos : DXILOp<18, unary> {
   let Doc = "Returns the hyperbolic cosine of the specified value.";
-  let intrinsics = [ IntrinSelect<int_cosh> ];
+  let intrinsics = [IntrinSelect<int_cosh>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -494,9 +496,9 @@ def HCos :  DXILOp<18, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def HSin :  DXILOp<19, unary> {
+def HSin : DXILOp<19, unary> {
   let Doc = "Returns the hyperbolic sine of the specified value.";
-  let intrinsics = [ IntrinSelect<int_sinh> ];
+  let intrinsics = [IntrinSelect<int_sinh>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -504,9 +506,9 @@ def HSin :  DXILOp<19, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def HTan :  DXILOp<20, unary> {
+def HTan : DXILOp<20, unary> {
   let Doc = "Returns the hyperbolic tan of the specified value.";
-  let intrinsics = [ IntrinSelect<int_tanh> ];
+  let intrinsics = [IntrinSelect<int_tanh>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -514,10 +516,10 @@ def HTan :  DXILOp<20, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def Exp2 :  DXILOp<21, unary> {
+def Exp2 : DXILOp<21, unary> {
   let Doc = "Returns the base 2 exponential, or 2**x, of the specified value. "
             "exp2(x) = 2**x.";
-  let intrinsics = [ IntrinSelect<int_exp2> ];
+  let intrinsics = [IntrinSelect<int_exp2>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -525,10 +527,10 @@ def Exp2 :  DXILOp<21, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def Frac :  DXILOp<22, unary> {
+def Frac : DXILOp<22, unary> {
   let Doc = "Returns a fraction from 0 to 1 that represents the decimal part "
             "of the input.";
-  let intrinsics = [ IntrinSelect<int_dx_frac> ];
+  let intrinsics = [IntrinSelect<int_dx_frac>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -536,9 +538,9 @@ def Frac :  DXILOp<22, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def Log2 :  DXILOp<23, unary> {
+def Log2 : DXILOp<23, unary> {
   let Doc = "Returns the base-2 logarithm of the specified value.";
-  let intrinsics = [ IntrinSelect<int_log2> ];
+  let intrinsics = [IntrinSelect<int_log2>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -546,10 +548,10 @@ def Log2 :  DXILOp<23, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def Sqrt :  DXILOp<24, unary> {
+def Sqrt : DXILOp<24, unary> {
   let Doc = "Returns the square root of the specified floating-point value, "
             "per component.";
-  let intrinsics = [ IntrinSelect<int_sqrt> ];
+  let intrinsics = [IntrinSelect<int_sqrt>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -557,10 +559,10 @@ def Sqrt :  DXILOp<24, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def RSqrt :  DXILOp<25, unary> {
+def RSqrt : DXILOp<25, unary> {
   let Doc = "Returns the reciprocal of the square root of the specified value. "
             "rsqrt(x) = 1 / sqrt(x).";
-  let intrinsics = [ IntrinSelect<int_dx_rsqrt> ];
+  let intrinsics = [IntrinSelect<int_dx_rsqrt>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -568,10 +570,10 @@ def RSqrt :  DXILOp<25, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def Round :  DXILOp<26, unary> {
+def Round : DXILOp<26, unary> {
   let Doc = "Returns the input rounded to the nearest integer within a "
             "floating-point type.";
-  let intrinsics = [ IntrinSelect<int_roundeven> ];
+  let intrinsics = [IntrinSelect<int_roundeven>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -579,10 +581,10 @@ def Round :  DXILOp<26, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def Floor :  DXILOp<27, unary> {
+def Floor : DXILOp<27, unary> {
   let Doc =
       "Returns the largest integer that is less than or equal to the input.";
-  let intrinsics = [ IntrinSelect<int_floor> ];
+  let intrinsics = [IntrinSelect<int_floor>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -590,10 +592,10 @@ def Floor :  DXILOp<27, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def Ceil :  DXILOp<28, unary> {
+def Ceil : DXILOp<28, unary> {
   let Doc = "Returns the smallest integer that is greater than or equal to the "
             "input.";
-  let intrinsics = [ IntrinSelect<int_ceil> ];
+  let intrinsics = [IntrinSelect<int_ceil>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -601,9 +603,9 @@ def Ceil :  DXILOp<28, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def Trunc :  DXILOp<29, unary> {
+def Trunc : DXILOp<29, unary> {
   let Doc = "Returns the specified value truncated to the integer component.";
-  let intrinsics = [ IntrinSelect<int_trunc> ];
+  let intrinsics = [IntrinSelect<int_trunc>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -611,157 +613,144 @@ def Trunc :  DXILOp<29, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def Rbits :  DXILOp<30, unary> {
+def Rbits : DXILOp<30, unary> {
   let Doc = "Returns the specified value with its bits reversed.";
-  let intrinsics = [ IntrinSelect<int_bitreverse> ];
+  let intrinsics = [IntrinSelect<int_bitreverse>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
-  let overloads =
-      [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
+  let overloads = [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def CountBits :  DXILOp<31, unaryBits> {
+def CountBits : DXILOp<31, unaryBits> {
   let Doc = "Returns the number of 1 bits in the specified value.";
   let arguments = [OverloadTy];
   let result = Int32Ty;
-  let overloads =
-      [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
+  let overloads = [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def FirstbitHi :  DXILOp<33, unaryBits> {
+def FirstbitHi : DXILOp<33, unaryBits> {
   let Doc = "Returns the location of the first set bit starting from "
             "the highest order bit and working downward.";
-  let intrinsics = [ IntrinSelect<int_dx_firstbituhigh> ];
+  let intrinsics = [IntrinSelect<int_dx_firstbituhigh>];
   let arguments = [OverloadTy];
   let result = Int32Ty;
-  let overloads =
-      [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
+  let overloads = [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def FirstbitSHi :  DXILOp<34, unaryBits> {
+def FirstbitSHi : DXILOp<34, unaryBits> {
   let Doc = "Returns the location of the first set bit from "
             "the highest order bit based on the sign.";
-  let intrinsics = [ IntrinSelect<int_dx_firstbitshigh> ];
+  let intrinsics = [IntrinSelect<int_dx_firstbitshigh>];
   let arguments = [OverloadTy];
   let result = Int32Ty;
-  let overloads =
-      [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
+  let overloads = [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def FMax :  DXILOp<35, binary> {
+def FMax : DXILOp<35, binary> {
   let Doc = "Float maximum. FMax(a,b) = a > b ? a : b";
-  let intrinsics = [ IntrinSelect<int_maxnum> ];
+  let intrinsics = [IntrinSelect<int_maxnum>];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
-  let overloads =
-      [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy]>];
+  let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def FMin :  DXILOp<36, binary> {
+def FMin : DXILOp<36, binary> {
   let Doc = "Float minimum. FMin(a,b) = a < b ? a : b";
-  let intrinsics = [ IntrinSelect<int_minnum> ];
+  let intrinsics = [IntrinSelect<int_minnum>];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
-  let overloads =
-      [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy]>];
+  let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def SMax :  DXILOp<37, binary> {
+def SMax : DXILOp<37, binary> {
   let Doc = "Signed integer maximum. SMax(a,b) = a > b ? a : b";
-  let intrinsics = [ IntrinSelect<int_smax> ];
+  let intrinsics = [IntrinSelect<int_smax>];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
-  let overloads =
-      [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
+  let overloads = [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def SMin :  DXILOp<38, binary> {
+def SMin : DXILOp<38, binary> {
   let Doc = "Signed integer minimum. SMin(a,b) = a < b ? a : b";
-  let intrinsics = [ IntrinSelect<int_smin> ];
+  let intrinsics = [IntrinSelect<int_smin>];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
-  let overloads =
-      [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
+  let overloads = [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def UMax :  DXILOp<39, binary> {
+def UMax : DXILOp<39, binary> {
   let Doc = "Unsigned integer maximum. UMax(a,b) = a > b ? a : b";
-  let intrinsics = [ IntrinSelect<int_umax> ];
+  let intrinsics = [IntrinSelect<int_umax>];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
-  let overloads =
-      [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
+  let overloads = [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def UMin :  DXILOp<40, binary> {
+def UMin : DXILOp<40, binary> {
   let Doc = "Unsigned integer minimum. UMin(a,b) = a < b ? a : b";
-  let intrinsics = [ IntrinSelect<int_umin> ];
+  let intrinsics = [IntrinSelect<int_umin>];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
-  let overloads =
-      [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
+  let overloads = [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def FMad :  DXILOp<46, tertiary> {
+def FMad : DXILOp<46, tertiary> {
   let Doc = "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m "
             "* a + b.";
-  let intrinsics = [ IntrinSelect<int_fmuladd> ];
+  let intrinsics = [IntrinSelect<int_fmuladd>];
   let arguments = [OverloadTy, OverloadTy, OverloadTy];
   let result = OverloadTy;
-  let overloads =
-      [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy]>];
+  let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def IMad :  DXILOp<48, tertiary> {
+def IMad : DXILOp<48, tertiary> {
   let Doc = "Signed integer arithmetic multiply/add operation. imad(m,a,b) = m "
             "* a + b.";
-  let intrinsics = [ IntrinSelect<int_dx_imad> ];
+  let intrinsics = [IntrinSelect<int_dx_imad>];
   let arguments = [OverloadTy, OverloadTy, OverloadTy];
   let result = OverloadTy;
-  let overloads =
-      [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
+  let overloads = [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def UMad :  DXILOp<49, tertiary> {
+def UMad : DXILOp<49, tertiary> {
   let Doc = "Unsigned integer arithmetic multiply/add operation. umad(m,a, = m "
             "* a + b.";
-  let intrinsics = [ IntrinSelect<int_dx_umad> ];
+  let intrinsics = [IntrinSelect<int_dx_umad>];
   let arguments = [OverloadTy, OverloadTy, OverloadTy];
   let result = OverloadTy;
-  let overloads =
-      [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
+  let overloads = [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def Dot2 :  DXILOp<54, dot2> {
+def Dot2 : DXILOp<54, dot2> {
   let Doc = "dot product of two float vectors Dot(a,b) = a[0]*b[0] + ... + "
             "a[n]*b[n] where n is 0 to 1 inclusive";
-  let intrinsics = [ IntrinSelect<int_dx_dot2> ];
+  let intrinsics = [IntrinSelect<int_dx_dot2>];
   let arguments = !listsplat(OverloadTy, 4);
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -769,10 +758,10 @@ def Dot2 :  DXILOp<54, dot2> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def Dot3 :  DXILOp<55, dot3> {
+def Dot3 : DXILOp<55, dot3> {
   let Doc = "dot product of two float vectors Dot(a,b) = a[0]*b[0] + ... + "
             "a[n]*b[n] where n is 0 to 2 inclusive";
-  let intrinsics = [ IntrinSelect<int_dx_dot3> ];
+  let intrinsics = [IntrinSelect<int_dx_dot3>];
   let arguments = !listsplat(OverloadTy, 6);
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -780,10 +769,10 @@ def Dot3 :  DXILOp<55, dot3> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def Dot4 :  DXILOp<56, dot4> {
+def Dot4 : DXILOp<56, dot4> {
   let Doc = "dot product of two float vectors Dot(a,b) = a[0]*b[0] + ... + "
             "a[n]*b[n] where n is 0 to 3 inclusive";
-  let intrinsics = [ IntrinSelect<int_dx_dot4> ];
+  let intrinsics = [IntrinSelect<int_dx_dot4>];
   let arguments = !listsplat(OverloadTy, 8);
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -844,9 +833,8 @@ def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> {
 def Barrier : DXILOp<80, barrier> {
   let Doc = "inserts a memory barrier in the shader";
   let intrinsics = [
-    IntrinSelect<
-        int_dx_group_memory_barrier_with_group_sync,
-        [ IntrinArgI32<BarrierMode_GroupMemoryBarrierWithGroupSync> ]>,
+    IntrinSelect<int_dx_group_memory_barrier_with_group_sync,
+                 [IntrinArgI32<BarrierMode_GroupMemoryBarrierWithGroupSync>]>,
   ];
 
   let arguments = [Int32Ty];
@@ -857,15 +845,15 @@ def Barrier : DXILOp<80, barrier> {
 
 def Discard : DXILOp<82, discard> {
   let Doc = "discard the current pixel";
-  let intrinsics = [ IntrinSelect<int_dx_discard> ];
+  let intrinsics = [IntrinSelect<int_dx_discard>];
   let arguments = [Int1Ty];
   let result = VoidTy;
   let stages = [Stages<DXIL1_0, [pixel]>];
 }
 
-def ThreadId :  DXILOp<93, threadId> {
+def ThreadId : DXILOp<93, threadId> {
   let Doc = "Reads the thread ID";
-  let intrinsics = [ IntrinSelect<int_dx_thread_id> ];
+  let intrinsics = [IntrinSelect<int_dx_thread_id>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
@@ -873,9 +861,9 @@ def ThreadId :  DXILOp<93, threadId> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def GroupId :  DXILOp<94, groupId> {
+def GroupId : DXILOp<94, groupId> {
   let Doc = "Reads the group ID (SV_GroupID)";
-  let intrinsics = [ IntrinSelect<int_dx_group_id> ];
+  let intrinsics = [IntrinSelect<int_dx_group_id>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
@@ -883,9 +871,9 @@ def GroupId :  DXILOp<94, groupId> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def ThreadIdInGroup :  DXILOp<95, threadIdInGroup> {
+def ThreadIdInGroup : DXILOp<95, threadIdInGroup> {
   let Doc = "Reads the thread ID within the group  (SV_GroupThreadID)";
-  let intrinsics = [ IntrinSelect<int_dx_thread_id_in_group> ];
+  let intrinsics = [IntrinSelect<int_dx_thread_id_in_group>];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
@@ -893,26 +881,26 @@ def ThreadIdInGroup :  DXILOp<95, threadIdInGroup> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def FlattenedThreadIdInGroup :  DXILOp<96, flattenedThreadIdInGroup> {
+def FlattenedThreadIdInGroup : DXILOp<96, flattenedThreadIdInGroup> {
   let Doc = "Provides a flattened index for a given thread within a given "
             "group (SV_GroupIndex)";
-  let intrinsics = [ IntrinSelect<int_dx_flattened_thread_id_in_group> ];
+  let intrinsics = [IntrinSelect<int_dx_flattened_thread_id_in_group>];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
   let stages = [Stages<DXIL1_0, [compute, mesh, amplification, node]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def MakeDouble :  DXILOp<101, makeDouble> {
+def MakeDouble : DXILOp<101, makeDouble> {
   let Doc = "creates a double value";
-  let intrinsics = [ IntrinSelect<int_dx_asdouble> ];
+  let intrinsics = [IntrinSelect<int_dx_asdouble>];
   let arguments = [Int32Ty, Int32Ty];
   let result = DoubleTy;
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def SplitDouble :  DXILOp<102, splitDouble> {
+def SplitDouble : DXILOp<102, splitDouble> {
   let Doc = "Splits a double into 2 uints";
   let arguments = [OverloadTy];
   let result = SplitDoubleTy;
@@ -921,9 +909,9 @@ def SplitDouble :  DXILOp<102, splitDouble> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def WaveIsFirstLane :  DXILOp<110, waveIsFirstLane> {
+def WaveIsFirstLane : DXILOp<110, waveIsFirstLane> {
   let Doc = "returns 1 for the first lane in the wave";
-  let intrinsics = [ IntrinSelect<int_dx_wave_is_first_lane> ];
+  let intrinsics = [IntrinSelect<int_dx_wave_is_first_lane>];
   let arguments = [];
   let result = Int1Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
@@ -931,7 +919,7 @@ def WaveIsFirstLane :  DXILOp<110, waveIsFirstLane> {
 
 def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> {
   let Doc = "returns the index of the current lane in the wave";
-  let intrinsics = [ IntrinSelect<int_dx_wave_getlaneindex> ];
+  let intrinsics = [IntrinSelect<int_dx_wave_getlaneindex>];
   let arguments = [];
   let result = Int32Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
@@ -939,25 +927,27 @@ def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> {
 }
 
 def WaveActiveAnyTrue : DXILOp<113, waveAnyTrue> {
-  let Doc = "returns true if the expression is true in any of the active lanes in the current wave";
-  let intrinsics = [ IntrinSelect<int_dx_wave_any> ];
+  let Doc = "returns true if the expression is true in any of the active lanes "
+            "in the current wave";
+  let intrinsics = [IntrinSelect<int_dx_wave_any>];
   let arguments = [Int1Ty];
   let result = Int1Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
 }
 
-def WaveReadLaneAt:  DXILOp<117, waveReadLaneAt> {
+def WaveReadLaneAt : DXILOp<117, waveReadLaneAt> {
   let Doc = "returns the value from the specified lane";
-  let intrinsics = [ IntrinSelect<int_dx_wave_readlane> ];
+  let intrinsics = [IntrinSelect<int_dx_wave_readlane>];
   let arguments = [OverloadTy, Int32Ty];
   let result = OverloadTy;
-  let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy, Int1Ty, Int16Ty, Int32Ty, Int64Ty]>];
+  let overloads = [Overloads<
+      DXIL1_0, [HalfTy, FloatTy, DoubleTy, Int1Ty, Int16Ty, Int32Ty, Int64Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
 }
 
 def WaveAllBitCount : DXILOp<135, waveAllOp> {
   let Doc = "returns the count of bits set to 1 across the wave";
-  let intrinsics = [ IntrinSelect<int_dx_wave_active_countbits> ];
+  let intrinsics = [IntrinSelect<int_dx_wave_active_countbits>];
   let arguments = [Int1Ty];
   let result = Int32Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
@@ -966,7 +956,7 @@ def WaveAllBitCount : DXILOp<135, waveAllOp> {
 def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> {
   let Doc = "signed dot product of 4 x i8 vectors packed into i32, with "
             "accumulate to i32";
-  let intrinsics = [ IntrinSelect<int_dx_dot4add_i8packed> ];
+  let intrinsics = [IntrinSelect<int_dx_dot4add_i8packed>];
   let arguments = [Int32Ty, Int32Ty, Int32Ty];
   let result = Int32Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
@@ -976,7 +966,7 @@ def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> {
 def Dot4AddU8Packed : DXILOp<164, dot4AddPacked> {
   let Doc = "unsigned dot product of 4 x i8 vectors packed into i32, with "
             "accumulate to i32";
-  let intrinsics = [ IntrinSelect<int_dx_dot4add_u8packed> ];
+  let intrinsics = [IntrinSelect<int_dx_dot4add_u8packed>];
   let arguments = [Int32Ty, Int32Ty, Int32Ty];
   let result = Int32Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];

>From b99955fa0f7b3897ccd684256a7d802338921ea3 Mon Sep 17 00:00:00 2001
From: Finn Plummer <canadienfinn at gmail.com>
Date: Mon, 16 Dec 2024 22:11:57 +0000
Subject: [PATCH 12/12] update `DXILProperty` uses

- update existing wave ops to specify the `IsWave` property
- update the `Barrier` op to specify the `IsBarrier` property
---
 llvm/lib/Target/DirectX/DXIL.td | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 852953a6b0df14..e7fd5ec3554c14 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -841,6 +841,7 @@ def Barrier : DXILOp<80, barrier> {
   let result = VoidTy;
   let stages = [Stages<DXIL1_0, [compute, library]>];
   let attributes = [Attributes<DXIL1_0, []>];
+  let properties = [IsBarrier];
 }
 
 def Discard : DXILOp<82, discard> {
@@ -915,6 +916,7 @@ def WaveIsFirstLane : DXILOp<110, waveIsFirstLane> {
   let arguments = [];
   let result = Int1Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
+  let properties = [IsWave];
 }
 
 def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> {
@@ -924,6 +926,7 @@ def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> {
   let result = Int32Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
+  let properties = [IsWave];
 }
 
 def WaveActiveAnyTrue : DXILOp<113, waveAnyTrue> {
@@ -933,6 +936,7 @@ def WaveActiveAnyTrue : DXILOp<113, waveAnyTrue> {
   let arguments = [Int1Ty];
   let result = Int1Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
+  let properties = [IsWave];
 }
 
 def WaveReadLaneAt : DXILOp<117, waveReadLaneAt> {
@@ -943,6 +947,7 @@ def WaveReadLaneAt : DXILOp<117, waveReadLaneAt> {
   let overloads = [Overloads<
       DXIL1_0, [HalfTy, FloatTy, DoubleTy, Int1Ty, Int16Ty, Int32Ty, Int64Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
+  let properties = [IsWave];
 }
 
 def WaveAllBitCount : DXILOp<135, waveAllOp> {
@@ -951,6 +956,7 @@ def WaveAllBitCount : DXILOp<135, waveAllOp> {
   let arguments = [Int1Ty];
   let result = Int32Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
+  let properties = [IsWave];
 }
 
 def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> {