[llvm] [DXIL] Add GroupMemoryBarrierWithGroupSync intrinsic (PR #114349)

Adam Yang via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 1 17:03:10 PST 2024


https://github.com/adam-yang updated https://github.com/llvm/llvm-project/pull/114349

>From 0dcfae17dcdf91ad126797af30f83736e81a4fed Mon Sep 17 00:00:00 2001
From: Adam Yang <31109344+adam-yang at users.noreply.github.com>
Date: Tue, 8 Oct 2024 00:53:11 -0700
Subject: [PATCH 01/24] Added GroupMemoryBarrierWithGroupSync intrinsic for
 DXIL

---
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |  2 ++
 llvm/lib/Target/DirectX/DXIL.td               |  8 +++++
 llvm/lib/Target/DirectX/DXILConstants.h       |  7 +++++
 llvm/lib/Target/DirectX/DXILOpLowering.cpp    | 31 +++++++++++++++++++
 .../GroupMemoryBarrierWithGroupSync.ll        |  8 +++++
 5 files changed, 56 insertions(+)
 create mode 100644 llvm/test/CodeGen/DirectX/GroupMemoryBarrierWithGroupSync.ll

diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index bf49ec6f6c6496..8cb5432d039ec7 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -107,4 +107,6 @@ def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>
 def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;
 def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
+
+def int_dx_groupMemoryBarrierWithGroupSync : DefaultAttrsIntrinsic<[], [], []>;
 }
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 7cc08b2fe7cc4b..a5ea4a6ea718a9 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -278,6 +278,7 @@ def IsFeedback : DXILAttribute;
 def IsWave : DXILAttribute;
 def NeedsUniformInputs : DXILAttribute;
 def IsBarrier : DXILAttribute;
+def NoDuplicate : DXILAttribute;
 
 class Overloads<Version ver, list<DXILOpParamType> ols> {
   Version dxil_version = ver;
@@ -913,4 +914,11 @@ def WaveAllBitCount : DXILOp<135, waveAllOp> {
   let result = Int32Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+
+def Barrier : DXILOp<80, barrier> {
+  let Doc = "inserts a memory barrier in the shader";
+  let arguments = [Int32Ty];
+  let result = VoidTy;
+  let stages = [Stages<DXIL1_0, [compute, library]>];
+  let attributes = [Attributes<DXIL1_0, [NoDuplicate]>];
 }
diff --git a/llvm/lib/Target/DirectX/DXILConstants.h b/llvm/lib/Target/DirectX/DXILConstants.h
index 022cd57795a063..38984727761bb3 100644
--- a/llvm/lib/Target/DirectX/DXILConstants.h
+++ b/llvm/lib/Target/DirectX/DXILConstants.h
@@ -30,6 +30,13 @@ enum class OpParamType : unsigned {
 #include "DXILOperation.inc"
 };
 
+enum class BarrierMode : unsigned {
+  SyncThreadGroup = 0x00000001,
+  UAVFenceGlobal = 0x00000002,
+  UAVFenceThreadGroup = 0x00000004,
+  TGSMFence = 0x00000008,
+};
+
 } // namespace dxil
 } // namespace llvm
 
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index b5cc209493ed1f..29e404b1e3ef9a 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -260,6 +260,34 @@ class OpLowerer {
     });
   }
 
+  [[nodiscard]] bool lowerBarrier(Function& F, Intrinsic::ID IntrId) {
+    IRBuilder<> &IRB = OpBuilder.getIRB();
+    return replaceFunction(F, [&](CallInst *CI) -> Error {
+      unsigned BarrierMode = 0;
+      switch (IntrId) {
+      default:
+        report_fatal_error("Unhandled barrier operation type.");
+        break;
+      case Intrinsic::dx_groupMemoryBarrierWithGroupSync:
+        BarrierMode = (unsigned)dxil::BarrierMode::TGSMFence | (unsigned)dxil::BarrierMode::SyncThreadGroup;
+        break;
+      }
+
+      std::array<Value *, 1> Args{IRB.getInt32(BarrierMode)};
+
+      IRB.SetInsertPoint(CI);
+      Expected<CallInst *> OpCall =
+          OpBuilder.tryCreateOp(OpCode::Barrier, Args, CI->getName());
+      if (Error E = OpCall.takeError())
+        return E;
+
+      CI->replaceAllUsesWith(OpCall.get());
+      CI->eraseFromParent();
+
+      return Error::success();
+    });
+  }
+
   [[nodiscard]] bool lowerToBindAndAnnotateHandle(Function &F) {
     IRBuilder<> &IRB = OpBuilder.getIRB();
     Type *Int32Ty = IRB.getInt32Ty();
@@ -646,6 +674,9 @@ class OpLowerer {
     HasErrors |= replaceFunctionWithOp(F, OpCode);                             \
     break;
 #include "DXILOperation.inc"
+      case Intrinsic::dx_groupMemoryBarrierWithGroupSync:
+        HasErrors |= lowerBarrier(F, ID);
+        break;
       case Intrinsic::dx_handle_fromBinding:
         HasErrors |= lowerHandleFromBinding(F);
         break;
diff --git a/llvm/test/CodeGen/DirectX/GroupMemoryBarrierWithGroupSync.ll b/llvm/test/CodeGen/DirectX/GroupMemoryBarrierWithGroupSync.ll
new file mode 100644
index 00000000000000..a99c6757814f3b
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/GroupMemoryBarrierWithGroupSync.ll
@@ -0,0 +1,8 @@
+; RUN: opt -S  -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefix=CHECK
+
+define void @test_group_memory_barrier_with_group_sync() {
+entry:
+  ; CHECK: call void @dx.op.barrier(i32 80, i32 9)
+  call void @llvm.dx.groupMemoryBarrierWithGroupSync()
+  ret void
+}
\ No newline at end of file

>From 901aef504d8c4f86856aaccb10720fdc9e66fcf6 Mon Sep 17 00:00:00 2001
From: Adam Yang <hanbyang at microsoft.com>
Date: Thu, 10 Oct 2024 12:25:26 -0700
Subject: [PATCH 02/24] Changed naming convention and fixed formatting

---
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |  2 +-
 llvm/lib/Target/DirectX/DXILOpLowering.cpp    | 23 ++++++++-----------
 ...> group_memory_barrier_with_group_sync.ll} |  2 +-
 3 files changed, 12 insertions(+), 15 deletions(-)
 rename llvm/test/CodeGen/DirectX/{GroupMemoryBarrierWithGroupSync.ll => group_memory_barrier_with_group_sync.ll} (80%)

diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 8cb5432d039ec7..0d24790b4ad58d 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -108,5 +108,5 @@ def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;
 def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 
-def int_dx_groupMemoryBarrierWithGroupSync : DefaultAttrsIntrinsic<[], [], []>;
+def int_dx_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
 }
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 29e404b1e3ef9a..820725cbd45096 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -260,19 +260,14 @@ class OpLowerer {
     });
   }
 
-  [[nodiscard]] bool lowerBarrier(Function& F, Intrinsic::ID IntrId) {
+  [[nodiscard]] bool lowerBarrier(Function &F, Intrinsic::ID IntrId,
+                                  ArrayRef<dxil::BarrierMode> BarrierModes) {
+    unsigned BarrierMode = 0;
+    for (const dxil::BarrierMode B : BarrierModes) {
+      BarrierMode |= (unsigned)B;
+    }
     IRBuilder<> &IRB = OpBuilder.getIRB();
     return replaceFunction(F, [&](CallInst *CI) -> Error {
-      unsigned BarrierMode = 0;
-      switch (IntrId) {
-      default:
-        report_fatal_error("Unhandled barrier operation type.");
-        break;
-      case Intrinsic::dx_groupMemoryBarrierWithGroupSync:
-        BarrierMode = (unsigned)dxil::BarrierMode::TGSMFence | (unsigned)dxil::BarrierMode::SyncThreadGroup;
-        break;
-      }
-
       std::array<Value *, 1> Args{IRB.getInt32(BarrierMode)};
 
       IRB.SetInsertPoint(CI);
@@ -674,8 +669,10 @@ class OpLowerer {
     HasErrors |= replaceFunctionWithOp(F, OpCode);                             \
     break;
 #include "DXILOperation.inc"
-      case Intrinsic::dx_groupMemoryBarrierWithGroupSync:
-        HasErrors |= lowerBarrier(F, ID);
+      case Intrinsic::dx_group_memory_barrier_with_group_sync:
+        HasErrors |= lowerBarrier(
+            F, ID,
+            {dxil::BarrierMode::TGSMFence, dxil::BarrierMode::SyncThreadGroup});
         break;
       case Intrinsic::dx_handle_fromBinding:
         HasErrors |= lowerHandleFromBinding(F);
diff --git a/llvm/test/CodeGen/DirectX/GroupMemoryBarrierWithGroupSync.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
similarity index 80%
rename from llvm/test/CodeGen/DirectX/GroupMemoryBarrierWithGroupSync.ll
rename to llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
index a99c6757814f3b..48907647c660f8 100644
--- a/llvm/test/CodeGen/DirectX/GroupMemoryBarrierWithGroupSync.ll
+++ b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
@@ -3,6 +3,6 @@
 define void @test_group_memory_barrier_with_group_sync() {
 entry:
   ; CHECK: call void @dx.op.barrier(i32 80, i32 9)
-  call void @llvm.dx.groupMemoryBarrierWithGroupSync()
+  call void @llvm.dx.group.memory.barrier.with.group.sync()
   ret void
 }
\ No newline at end of file

>From 42ba53b3b7bef695b4e67447c6fc07d72542b1f8 Mon Sep 17 00:00:00 2001
From: Adam Yang <hanbyang at microsoft.com>
Date: Thu, 10 Oct 2024 12:59:52 -0700
Subject: [PATCH 03/24] Got rid of the noduplicate attr

---
 llvm/lib/Target/DirectX/DXIL.td | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index a5ea4a6ea718a9..c047d55502a4b2 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -278,7 +278,6 @@ def IsFeedback : DXILAttribute;
 def IsWave : DXILAttribute;
 def NeedsUniformInputs : DXILAttribute;
 def IsBarrier : DXILAttribute;
-def NoDuplicate : DXILAttribute;
 
 class Overloads<Version ver, list<DXILOpParamType> ols> {
   Version dxil_version = ver;
@@ -920,5 +919,5 @@ def Barrier : DXILOp<80, barrier> {
   let arguments = [Int32Ty];
   let result = VoidTy;
   let stages = [Stages<DXIL1_0, [compute, library]>];
-  let attributes = [Attributes<DXIL1_0, [NoDuplicate]>];
+  let attributes = [Attributes<DXIL1_0, []>];
 }

>From 746d639b2392f66d03a3303eba3ad4c70e29b4f0 Mon Sep 17 00:00:00 2001
From: Adam Yang <hanbyang at microsoft.com>
Date: Thu, 10 Oct 2024 13:03:52 -0700
Subject: [PATCH 04/24] Shader model changed to 6.0

---
 .../CodeGen/DirectX/group_memory_barrier_with_group_sync.ll     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
index 48907647c660f8..7cacbe778ac952 100644
--- a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
+++ b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S  -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefix=CHECK
+; RUN: opt -S  -dxil-op-lower -mtriple=dxil-pc-shadermodel6.0-library < %s | FileCheck %s --check-prefix=CHECK
 
 define void @test_group_memory_barrier_with_group_sync() {
 entry:

>From 39f40ffb5d5d6e416d13815909e6bec5f78feeec Mon Sep 17 00:00:00 2001
From: Adam Yang <hanbyang at microsoft.com>
Date: Thu, 10 Oct 2024 13:07:03 -0700
Subject: [PATCH 05/24] Fixed the incorrect arguments list

---
 llvm/lib/Target/DirectX/DXIL.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index c047d55502a4b2..7a66319b44fc57 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -916,7 +916,7 @@ def WaveAllBitCount : DXILOp<135, waveAllOp> {
 
 def Barrier : DXILOp<80, barrier> {
   let Doc = "inserts a memory barrier in the shader";
-  let arguments = [Int32Ty];
+  let arguments = [];
   let result = VoidTy;
   let stages = [Stages<DXIL1_0, [compute, library]>];
   let attributes = [Attributes<DXIL1_0, []>];

>From 8bd228c2d6c5ac965a07f76fb35ee7a98360db59 Mon Sep 17 00:00:00 2001
From: Adam Yang <hanbyang at microsoft.com>
Date: Thu, 10 Oct 2024 13:13:21 -0700
Subject: [PATCH 06/24] Revert "Shader model changed to 6.0"

This reverts commit c7d83cf8ae32d98b0677ff1a88f74fe4827dd61f.
---
 .../CodeGen/DirectX/group_memory_barrier_with_group_sync.ll     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
index 7cacbe778ac952..48907647c660f8 100644
--- a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
+++ b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S  -dxil-op-lower -mtriple=dxil-pc-shadermodel6.0-library < %s | FileCheck %s --check-prefix=CHECK
+; RUN: opt -S  -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefix=CHECK
 
 define void @test_group_memory_barrier_with_group_sync() {
 entry:

>From e5c71ca514d7807c12b66291c5eb73620a4ff44f Mon Sep 17 00:00:00 2001
From: Adam Yang <31109344+adam-yang at users.noreply.github.com>
Date: Tue, 15 Oct 2024 15:56:50 -0700
Subject: [PATCH 07/24] Added another intermediate memory barrier intrinsic
 that maps directly to the barrier dxil op

---
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |  1 +
 llvm/lib/Target/DirectX/DXIL.td               |  3 +-
 .../Target/DirectX/DXILIntrinsicExpansion.cpp | 26 +++++++++++++++++
 llvm/lib/Target/DirectX/DXILOpLowering.cpp    | 28 -------------------
 .../group_memory_barrier_with_group_sync.ll   |  2 +-
 5 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 0d24790b4ad58d..68638a6aa6dc6e 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -108,5 +108,6 @@ def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;
 def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 
+def int_dx_memory_barrier : DefaultAttrsIntrinsic<[], [llvm_i32_ty], []>;
 def int_dx_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
 }
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 7a66319b44fc57..cb9bf305e32d44 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -916,7 +916,8 @@ def WaveAllBitCount : DXILOp<135, waveAllOp> {
 
 def Barrier : DXILOp<80, barrier> {
   let Doc = "inserts a memory barrier in the shader";
-  let arguments = [];
+  let LLVMIntrinsic = int_dx_memory_barrier;
+  let arguments = [Int32Ty];
   let result = VoidTy;
   let stages = [Stages<DXIL1_0, [compute, library]>];
   let attributes = [Attributes<DXIL1_0, []>];
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index d2bfca1fada559..10d0f25c8db923 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -10,6 +10,7 @@
 //  opcodes in DirectX Intermediate Language (DXIL).
 //===----------------------------------------------------------------------===//
 
+#include "DXILConstants.h"
 #include "DXILIntrinsicExpansion.h"
 #include "DirectX.h"
 #include "llvm/ADT/STLExtras.h"
@@ -67,6 +68,7 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::dx_sign:
   case Intrinsic::dx_step:
   case Intrinsic::dx_radians:
+  case Intrinsic::dx_group_memory_barrier_with_group_sync:
     return true;
   }
   return false;
@@ -453,6 +455,27 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
   return Builder.CreateFMul(X, PiOver180);
 }
 
+static Value *expandMemoryBarrier(CallInst *Orig, Intrinsic::ID IntrinsicId) {
+  assert(IntrinsicId == Intrinsic::dx_group_memory_barrier_with_group_sync);
+  unsigned BarrierMode = 0;
+  switch (IntrinsicId) {
+  case Intrinsic::dx_group_memory_barrier_with_group_sync:
+    BarrierMode = (unsigned)dxil::BarrierMode::TGSMFence |
+                  (unsigned)dxil::BarrierMode::SyncThreadGroup;
+    break;
+  default:
+    report_fatal_error(Twine("Unexpected memory barrier intrinsic."),
+                       /* gen_crash_diag=*/false);
+    break;
+  }
+
+  IRBuilder<> Builder(Orig);
+  return Builder.CreateIntrinsic(
+      Builder.getVoidTy(), Intrinsic::dx_memory_barrier,
+      ArrayRef<Value *>{Builder.getInt32(BarrierMode)}, nullptr,
+      Orig->getName());
+}
+
 static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic) {
   if (ClampIntrinsic == Intrinsic::dx_uclamp)
     return Intrinsic::umax;
@@ -580,6 +603,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
   case Intrinsic::dx_radians:
     Result = expandRadiansIntrinsic(Orig);
     break;
+  case Intrinsic::dx_group_memory_barrier_with_group_sync:
+    Result = expandMemoryBarrier(Orig, IntrinsicId);
+    break;
   }
   if (Result) {
     Orig->replaceAllUsesWith(Result);
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 820725cbd45096..b5cc209493ed1f 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -260,29 +260,6 @@ class OpLowerer {
     });
   }
 
-  [[nodiscard]] bool lowerBarrier(Function &F, Intrinsic::ID IntrId,
-                                  ArrayRef<dxil::BarrierMode> BarrierModes) {
-    unsigned BarrierMode = 0;
-    for (const dxil::BarrierMode B : BarrierModes) {
-      BarrierMode |= (unsigned)B;
-    }
-    IRBuilder<> &IRB = OpBuilder.getIRB();
-    return replaceFunction(F, [&](CallInst *CI) -> Error {
-      std::array<Value *, 1> Args{IRB.getInt32(BarrierMode)};
-
-      IRB.SetInsertPoint(CI);
-      Expected<CallInst *> OpCall =
-          OpBuilder.tryCreateOp(OpCode::Barrier, Args, CI->getName());
-      if (Error E = OpCall.takeError())
-        return E;
-
-      CI->replaceAllUsesWith(OpCall.get());
-      CI->eraseFromParent();
-
-      return Error::success();
-    });
-  }
-
   [[nodiscard]] bool lowerToBindAndAnnotateHandle(Function &F) {
     IRBuilder<> &IRB = OpBuilder.getIRB();
     Type *Int32Ty = IRB.getInt32Ty();
@@ -669,11 +646,6 @@ class OpLowerer {
     HasErrors |= replaceFunctionWithOp(F, OpCode);                             \
     break;
 #include "DXILOperation.inc"
-      case Intrinsic::dx_group_memory_barrier_with_group_sync:
-        HasErrors |= lowerBarrier(
-            F, ID,
-            {dxil::BarrierMode::TGSMFence, dxil::BarrierMode::SyncThreadGroup});
-        break;
       case Intrinsic::dx_handle_fromBinding:
         HasErrors |= lowerHandleFromBinding(F);
         break;
diff --git a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
index 48907647c660f8..c43625755d6efc 100644
--- a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
+++ b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S  -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefix=CHECK
+; RUN: opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefix=CHECK
 
 define void @test_group_memory_barrier_with_group_sync() {
 entry:

>From e6ec07b5cb7ebaef7b8f8ed626ec2b193d0c2633 Mon Sep 17 00:00:00 2001
From: Adam Yang <31109344+adam-yang at users.noreply.github.com>
Date: Tue, 15 Oct 2024 16:27:13 -0700
Subject: [PATCH 08/24] Format

---
 llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 10d0f25c8db923..c23461c0ef8183 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -10,8 +10,8 @@
 //  opcodes in DirectX Intermediate Language (DXIL).
 //===----------------------------------------------------------------------===//
 
-#include "DXILConstants.h"
 #include "DXILIntrinsicExpansion.h"
+#include "DXILConstants.h"
 #include "DirectX.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"

>From 107c1db9405dc9292aacff69065f4b8e568ef9fe Mon Sep 17 00:00:00 2001
From: Adam Yang <hanbyang at microsoft.com>
Date: Wed, 16 Oct 2024 17:48:25 -0700
Subject: [PATCH 09/24] Tablegen simple

---
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |  1 -
 llvm/lib/Target/DirectX/DXIL.td               | 14 +++-
 .../Target/DirectX/DXILIntrinsicExpansion.cpp |  5 --
 llvm/lib/Target/DirectX/DXILOpLowering.cpp    | 25 +++----
 .../group_memory_barrier_with_group_sync.ll   |  2 +-
 llvm/utils/TableGen/DXILEmitter.cpp           | 66 +++++++++++++++----
 6 files changed, 80 insertions(+), 33 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 68638a6aa6dc6e..0d24790b4ad58d 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -108,6 +108,5 @@ def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;
 def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 
-def int_dx_memory_barrier : DefaultAttrsIntrinsic<[], [llvm_i32_ty], []>;
 def int_dx_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
 }
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index cb9bf305e32d44..593d87d35f13d0 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -294,6 +294,11 @@ class Attributes<Version ver = DXIL1_0, list<DXILAttribute> attrs> {
   list<DXILAttribute> op_attrs = attrs;
 }
 
+class IntrinsicSelect<Intrinsic intr, list<string> extra_args> {
+  Intrinsic Intr = intr;
+  list<string> ExtraArgs = extra_args;
+}
+
 // Abstraction DXIL Operation
 class DXILOp<int opcode, DXILOpClass opclass> {
   // A short description of the operation
@@ -308,6 +313,8 @@ class DXILOp<int opcode, DXILOpClass opclass> {
   // LLVM Intrinsic DXIL Operation maps to
   Intrinsic LLVMIntrinsic = ?;
 
+  list<IntrinsicSelect> intrinsic_selects = [];
+
   // Result type of the op
   DXILOpParamType result;
 
@@ -916,7 +923,12 @@ def WaveAllBitCount : DXILOp<135, waveAllOp> {
 
 def Barrier : DXILOp<80, barrier> {
   let Doc = "inserts a memory barrier in the shader";
-  let LLVMIntrinsic = int_dx_memory_barrier;
+  let intrinsic_selects = [
+    IntrinsicSelect<
+        int_dx_group_memory_barrier_with_group_sync,
+        [ "OpBuilder.getIRB().getInt32((unsigned)BarrierMode::SyncThreadGroup | (unsigned)BarrierMode::TGSMFence)" ]>,
+  ];
+
   let arguments = [Int32Ty];
   let result = VoidTy;
   let stages = [Stages<DXIL1_0, [compute, library]>];
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index c23461c0ef8183..3fdfbaa659bd88 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "DXILIntrinsicExpansion.h"
-#include "DXILConstants.h"
 #include "DirectX.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
@@ -68,7 +67,6 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::dx_sign:
   case Intrinsic::dx_step:
   case Intrinsic::dx_radians:
-  case Intrinsic::dx_group_memory_barrier_with_group_sync:
     return true;
   }
   return false;
@@ -603,9 +601,6 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
   case Intrinsic::dx_radians:
     Result = expandRadiansIntrinsic(Orig);
     break;
-  case Intrinsic::dx_group_memory_barrier_with_group_sync:
-    Result = expandMemoryBarrier(Orig, IntrinsicId);
-    break;
   }
   if (Result) {
     Orig->replaceAllUsesWith(Result);
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index b5cc209493ed1f..9d51c28ca28f93 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -106,20 +106,21 @@ class OpLowerer {
     return false;
   }
 
-  [[nodiscard]]
-  bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp) {
+  [[nodiscard]] bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp,
+                                           ArrayRef<Value *> ExtraArgs) {
     bool IsVectorArgExpansion = isVectorArgExpansion(F);
     return replaceFunction(F, [&](CallInst *CI) -> Error {
-      SmallVector<Value *> Args;
-      OpBuilder.getIRB().SetInsertPoint(CI);
+      SmallVector<Value *> NewArgs;
       if (IsVectorArgExpansion) {
-        SmallVector<Value *> NewArgs = argVectorFlatten(CI, OpBuilder.getIRB());
-        Args.append(NewArgs.begin(), NewArgs.end());
+        NewArgs = argVectorFlatten(CI, OpBuilder.getIRB());
       } else
-        Args.append(CI->arg_begin(), CI->arg_end());
+        NewArgs.append(CI->arg_begin(), CI->arg_end());
 
-      Expected<CallInst *> OpCall =
-          OpBuilder.tryCreateOp(DXILOp, Args, CI->getName(), F.getReturnType());
+      NewArgs.append(ExtraArgs.begin(), ExtraArgs.end());
+
+      OpBuilder.getIRB().SetInsertPoint(CI);
+      Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
+          DXILOp, NewArgs, CI->getName(), F.getReturnType());
       if (Error E = OpCall.takeError())
         return E;
 
@@ -641,9 +642,9 @@ class OpLowerer {
       switch (ID) {
       default:
         continue;
-#define DXIL_OP_INTRINSIC(OpCode, Intrin)                                      \
-  case Intrin:                                                                 \
-    HasErrors |= replaceFunctionWithOp(F, OpCode);                             \
+#define DXIL_OP_INTRINSIC(OpCode, Intrin, ExtraArgs)                         \
+  case Intrin:                                                               \
+    HasErrors |= replaceFunctionWithOp(F, OpCode, ExtraArgs);                \
     break;
 #include "DXILOperation.inc"
       case Intrinsic::dx_handle_fromBinding:
diff --git a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
index c43625755d6efc..cd588d464e302b 100644
--- a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
+++ b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefix=CHECK
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefix=CHECK
 
 define void @test_group_memory_barrier_with_group_sync() {
 entry:
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index e74fc00015b404..5ab6b0edd02ab2 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -32,6 +32,11 @@ using namespace llvm::dxil;
 
 namespace {
 
+struct DXILIntrinsicSelect {
+  StringRef Intrinsic;
+  SmallVector<StringRef, 4> ExtraArgs;
+};
+
 struct DXILOperationDesc {
   std::string OpName; // name of DXIL operation
   int OpCode;         // ID of DXIL operation
@@ -42,8 +47,7 @@ struct DXILOperationDesc {
   SmallVector<const Record *> OverloadRecs;
   SmallVector<const Record *> StageRecs;
   SmallVector<const Record *> AttrRecs;
-  StringRef Intrinsic; // The llvm intrinsic map to OpName. Default is "" which
-                       // means no map exists
+  SmallVector<DXILIntrinsicSelect> IntrinsicSelects;
   SmallVector<StringRef, 4>
       ShaderStages; // shader stages to which this applies, empty for all.
   int OverloadParamIndex;             // Index of parameter with overload type.
@@ -157,14 +161,43 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
                            OpName);
   }
 
-  const RecordVal *RV = R->getValue("LLVMIntrinsic");
-  if (RV && RV->getValue()) {
-    if (const DefInit *DI = dyn_cast<DefInit>(RV->getValue())) {
-      auto *IntrinsicDef = DI->getDef();
-      auto DefName = IntrinsicDef->getName();
-      assert(DefName.starts_with("int_") && "invalid intrinsic name");
-      // Remove the int_ from intrinsic name.
-      Intrinsic = DefName.substr(4);
+  auto GetIntrinsicName = [](const RecordVal *RV) -> StringRef {
+    if (RV && RV->getValue()) {
+      if (const DefInit *DI = dyn_cast<DefInit>(RV->getValue())) {
+        auto *IntrinsicDef = DI->getDef();
+        auto DefName = IntrinsicDef->getName();
+        assert(DefName.starts_with("int_") && "invalid intrinsic name");
+        // Remove the int_ from intrinsic name.
+        return DefName.substr(4);
+      }
+    }
+    return "";
+  };
+
+  {
+    DXILIntrinsicSelect IntrSelect;
+    IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("LLVMIntrinsic"));
+    if (IntrSelect.Intrinsic.size())
+      IntrinsicSelects.emplace_back(std::move(IntrSelect));
+  }
+
+  Recs = R->getValueAsListOfDefs("intrinsic_selects");
+  if (Recs.size()) {
+    if (IntrinsicSelects.size()) {
+      PrintFatalError(R,
+                      Twine("LLVMIntrinsic and intrinsic_match cannot be both "
+                            "defined for DXIL operation - ") +
+                          OpName);
+    } else {
+      for (const Record *R : Recs) {
+        DXILIntrinsicSelect IntrSelect;
+        IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("Intr"));
+        auto ExtraArgs = R->getValueAsListOfStrings("ExtraArgs");
+        for (StringRef Arg : ExtraArgs) {
+          IntrSelect.ExtraArgs.push_back(Arg);
+        }
+        IntrinsicSelects.emplace_back(std::move(IntrSelect));
+      }
     }
   }
 }
@@ -377,10 +410,17 @@ static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
   OS << "#ifdef DXIL_OP_INTRINSIC\n";
   OS << "\n";
   for (const auto &Op : Ops) {
-    if (Op.Intrinsic.empty())
+    if (Op.IntrinsicSelects.empty()) {
       continue;
-    OS << "DXIL_OP_INTRINSIC(dxil::OpCode::" << Op.OpName
-       << ", Intrinsic::" << Op.Intrinsic << ")\n";
+    }
+    for (const DXILIntrinsicSelect &MappedIntr : Op.IntrinsicSelects) {
+      OS << "DXIL_OP_INTRINSIC(dxil::OpCode::" << Op.OpName
+         << ", Intrinsic::" << MappedIntr.Intrinsic << ", (ArrayRef<Value *> {";
+      for (const StringRef &Arg : MappedIntr.ExtraArgs) {
+        OS << Arg << ", ";
+      }
+      OS << "}))\n";
+    }
   }
   OS << "\n";
   OS << "#undef DXIL_OP_INTRINSIC\n";

>From 1b379136dbb4eb13f14bdae3f44901f0fb818e90 Mon Sep 17 00:00:00 2001
From: Adam Yang <31109344+adam-yang at users.noreply.github.com>
Date: Mon, 21 Oct 2024 10:53:23 -0700
Subject: [PATCH 10/24] All working now

---
 llvm/lib/Target/DirectX/DXIL.td            |  40 +++++++-
 llvm/lib/Target/DirectX/DXILConstants.h    |   7 --
 llvm/lib/Target/DirectX/DXILOpLowering.cpp |  44 +++++++--
 llvm/utils/TableGen/DXILEmitter.cpp        | 104 +++++++++++++++------
 4 files changed, 146 insertions(+), 49 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 593d87d35f13d0..1175b7a7aa8846 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -294,9 +294,41 @@ class Attributes<Version ver = DXIL1_0, list<DXILAttribute> attrs> {
   list<DXILAttribute> op_attrs = attrs;
 }
 
-class IntrinsicSelect<Intrinsic intr, list<string> extra_args> {
-  Intrinsic Intr = intr;
-  list<string> ExtraArgs = extra_args;
+class DXILConstant<int value_> {
+  int value = value_;
+}
+
+defset list<DXILConstant> BarrierModes = {
+  def BarrierMode_DeviceMemoryBarrier              : DXILConstant<2>;
+  def BarrierMode_DeviceMemoryBarrierWithGroupSync : DXILConstant<3>;
+  def BarrierMode_GroupMemoryBarrier               : DXILConstant<8>;
+  def BarrierMode_GroupMemoryBarrierWithGroupSync  : DXILConstant<9>;
+  def BarrierMode_AllMemoryBarrier                 : DXILConstant<10>;
+  def BarrierMode_AllMemoryBarrierWithGroupSync    : DXILConstant<11>;
+}
+
+// Intrinsic arg selection
+class Arg {
+  int index = -1;
+  DXILConstant value;
+  bit is_i8 = 0;
+  bit is_i32 = 0;
+}
+class ArgSelect<int index_> : Arg {
+  let index = index_;
+}
+class ArgI32<DXILConstant value_> : Arg {
+  let value = value_;
+  let is_i32 = 1;
+}
+class ArgI8<DXILConstant value_> : Arg {
+  let value = value_;
+  let is_i8 = 1;
+}
+
+class IntrinsicSelect<Intrinsic intrinsic_, list<Arg> args_=?> {
+  Intrinsic intrinsic = intrinsic_;
+  list<Arg> args = args_;
 }
 
 // Abstraction DXIL Operation
@@ -926,7 +958,7 @@ def Barrier : DXILOp<80, barrier> {
   let intrinsic_selects = [
     IntrinsicSelect<
         int_dx_group_memory_barrier_with_group_sync,
-        [ "OpBuilder.getIRB().getInt32((unsigned)BarrierMode::SyncThreadGroup | (unsigned)BarrierMode::TGSMFence)" ]>,
+        [ ArgI32<BarrierMode_GroupMemoryBarrierWithGroupSync> ]>,
   ];
 
   let arguments = [Int32Ty];
diff --git a/llvm/lib/Target/DirectX/DXILConstants.h b/llvm/lib/Target/DirectX/DXILConstants.h
index 38984727761bb3..022cd57795a063 100644
--- a/llvm/lib/Target/DirectX/DXILConstants.h
+++ b/llvm/lib/Target/DirectX/DXILConstants.h
@@ -30,13 +30,6 @@ enum class OpParamType : unsigned {
 #include "DXILOperation.inc"
 };
 
-enum class BarrierMode : unsigned {
-  SyncThreadGroup = 0x00000001,
-  UAVFenceGlobal = 0x00000002,
-  UAVFenceThreadGroup = 0x00000004,
-  TGSMFence = 0x00000008,
-};
-
 } // namespace dxil
 } // namespace llvm
 
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 9d51c28ca28f93..d2d0f11b756038 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -106,19 +106,44 @@ class OpLowerer {
     return false;
   }
 
+  struct Arg {
+    enum class Type {
+      Index,
+      I8,
+      I32,
+    };
+    Type Type = Type::Index;
+    int Value = -1;
+  };
+
   [[nodiscard]] bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp,
-                                           ArrayRef<Value *> ExtraArgs) {
+                                           ArrayRef<Arg> Args) {
     bool IsVectorArgExpansion = isVectorArgExpansion(F);
     return replaceFunction(F, [&](CallInst *CI) -> Error {
+      OpBuilder.getIRB().SetInsertPoint(CI);
       SmallVector<Value *> NewArgs;
-      if (IsVectorArgExpansion) {
+      if (Args.size()) {
+        for (const Arg &A : Args) {
+          switch (A.Type) {
+          case Arg::Type::Index:
+            NewArgs.push_back(CI->getArgOperand(A.Value));
+            break;
+          case Arg::Type::I8:
+            NewArgs.push_back(OpBuilder.getIRB().getInt8((uint8_t)A.Value));
+            break;
+          case Arg::Type::I32:
+            NewArgs.push_back(OpBuilder.getIRB().getInt32(A.Value));
+            break;
+          default:
+            llvm_unreachable("Invalid type of intrinsic arg.");
+          }
+        }
+      } else if (IsVectorArgExpansion) {
         NewArgs = argVectorFlatten(CI, OpBuilder.getIRB());
-      } else
+      } else {
         NewArgs.append(CI->arg_begin(), CI->arg_end());
+      }
 
-      NewArgs.append(ExtraArgs.begin(), ExtraArgs.end());
-
-      OpBuilder.getIRB().SetInsertPoint(CI);
       Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
           DXILOp, NewArgs, CI->getName(), F.getReturnType());
       if (Error E = OpCall.takeError())
@@ -642,9 +667,10 @@ class OpLowerer {
       switch (ID) {
       default:
         continue;
-#define DXIL_OP_INTRINSIC(OpCode, Intrin, ExtraArgs)                         \
-  case Intrin:                                                               \
-    HasErrors |= replaceFunctionWithOp(F, OpCode, ExtraArgs);                \
+#define DXIL_OP_INTRINSIC(OpCode, Intrin, ...)                                   \
+  case Intrin:                                                                   \
+    HasErrors |= replaceFunctionWithOp(F, OpCode,                                \
+                                       ArrayRef<Arg>{ __VA_ARGS__ });            \
     break;
 #include "DXILOperation.inc"
       case Intrinsic::dx_handle_fromBinding:
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index 5ab6b0edd02ab2..06f5f89615c6f3 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -32,9 +32,18 @@ using namespace llvm::dxil;
 
 namespace {
 
+struct DXILArgSelect {
+  enum class Type {
+    Index,
+    I32,
+    I8,
+  };
+  Type Type = Type::Index;
+  int Value = 0;
+};
 struct DXILIntrinsicSelect {
   StringRef Intrinsic;
-  SmallVector<StringRef, 4> ExtraArgs;
+  SmallVector<DXILArgSelect, 4> Args;
 };
 
 struct DXILOperationDesc {
@@ -75,6 +84,21 @@ static void ascendingSortByVersion(std::vector<const Record *> &Recs) {
   });
 }
 
+/// Take a `int_{intrinsic_name}` and return just the intrinsic_name part if available.
+/// Otherwise return the empty string.
+static StringRef GetIntrinsicName(const RecordVal *RV){
+  if (RV && RV->getValue()) {
+    if (const DefInit *DI = dyn_cast<DefInit>(RV->getValue())) {
+      auto *IntrinsicDef = DI->getDef();
+      auto DefName = IntrinsicDef->getName();
+      assert(DefName.starts_with("int_") && "invalid intrinsic name");
+      // Remove the int_ from intrinsic name.
+      return DefName.substr(4);
+    }
+  }
+  return "";
+}
+
 /// Construct an object using the DXIL Operation records specified
 /// in DXIL.td. This serves as the single source of reference of
 /// the information extracted from the specified Record R, for
@@ -161,19 +185,6 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
                            OpName);
   }
 
-  auto GetIntrinsicName = [](const RecordVal *RV) -> StringRef {
-    if (RV && RV->getValue()) {
-      if (const DefInit *DI = dyn_cast<DefInit>(RV->getValue())) {
-        auto *IntrinsicDef = DI->getDef();
-        auto DefName = IntrinsicDef->getName();
-        assert(DefName.starts_with("int_") && "invalid intrinsic name");
-        // Remove the int_ from intrinsic name.
-        return DefName.substr(4);
-      }
-    }
-    return "";
-  };
-
   {
     DXILIntrinsicSelect IntrSelect;
     IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("LLVMIntrinsic"));
@@ -181,20 +192,43 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
       IntrinsicSelects.emplace_back(std::move(IntrSelect));
   }
 
-  Recs = R->getValueAsListOfDefs("intrinsic_selects");
-  if (Recs.size()) {
+  auto IntrinsicSelectRecords = R->getValueAsListOfDefs("intrinsic_selects");
+  if (IntrinsicSelectRecords.size()) {
     if (IntrinsicSelects.size()) {
-      PrintFatalError(R,
-                      Twine("LLVMIntrinsic and intrinsic_match cannot be both "
-                            "defined for DXIL operation - ") +
-                          OpName);
+      PrintFatalError(
+          R, Twine("LLVMIntrinsic and intrinsic_selects cannot be both "
+                   "defined for DXIL operation - ") +
+                 OpName);
     } else {
-      for (const Record *R : Recs) {
+      for (const Record *R : IntrinsicSelectRecords) {
         DXILIntrinsicSelect IntrSelect;
-        IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("Intr"));
-        auto ExtraArgs = R->getValueAsListOfStrings("ExtraArgs");
-        for (StringRef Arg : ExtraArgs) {
-          IntrSelect.ExtraArgs.push_back(Arg);
+        IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("intrinsic"));
+        auto Args = R->getValueAsListOfDefs("args");
+        for (const Record *Arg : Args) {
+          bool IsI8 = Arg->getValueAsBit("is_i8");
+          bool IsI32 = Arg->getValueAsBit("is_i32");
+          int Index = Arg->getValueAsInt("index");
+          const Record *ValueRec = Arg->getValueAsDef("value");
+
+          DXILArgSelect ArgSelect;
+          if (IsI8) {
+            ArgSelect.Type = DXILArgSelect::Type::I8;
+            ArgSelect.Value = ValueRec->getValueAsInt("value");
+          } else if (IsI32) {
+            ArgSelect.Type = DXILArgSelect::Type::I32;
+            ArgSelect.Value = ValueRec->getValueAsInt("value");
+          } else {
+            if (Index < 0) {
+              PrintFatalError(
+                  R, Twine("Index in ArgSelect<index> must be equal to or "
+                           "greater than 0 for DXIL operation - ") +
+                         OpName);
+            }
+            ArgSelect.Type = DXILArgSelect::Type::Index;
+            ArgSelect.Value = Index;
+          }
+
+          IntrSelect.Args.emplace_back(std::move(ArgSelect));
         }
         IntrinsicSelects.emplace_back(std::move(IntrSelect));
       }
@@ -415,11 +449,23 @@ static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
     }
     for (const DXILIntrinsicSelect &MappedIntr : Op.IntrinsicSelects) {
       OS << "DXIL_OP_INTRINSIC(dxil::OpCode::" << Op.OpName
-         << ", Intrinsic::" << MappedIntr.Intrinsic << ", (ArrayRef<Value *> {";
-      for (const StringRef &Arg : MappedIntr.ExtraArgs) {
-        OS << Arg << ", ";
+         << ", Intrinsic::" << MappedIntr.Intrinsic;
+      for (const DXILArgSelect &ArgSelect : MappedIntr.Args) {
+        OS << ", (Arg { ";
+        switch (ArgSelect.Type) {
+        case DXILArgSelect::Type::Index:
+          OS << "Arg::Type::Index, ";
+          break;
+        case DXILArgSelect::Type::I8:
+          OS << "Arg::Type::I8, ";
+          break;
+        case DXILArgSelect::Type::I32:
+          OS << "Arg::Type::I32, ";
+          break;
+        }
+        OS << ArgSelect.Value << "})";
       }
-      OS << "}))\n";
+      OS << ")\n";
     }
   }
   OS << "\n";

>From f0f09ce88398299afb5f074f848f48512ed7d2a0 Mon Sep 17 00:00:00 2001
From: Adam Yang <31109344+adam-yang at users.noreply.github.com>
Date: Mon, 21 Oct 2024 11:17:01 -0700
Subject: [PATCH 11/24] Rename and clang-format

---
 llvm/lib/Target/DirectX/DXILOpLowering.cpp | 22 +++++++++++-----------
 llvm/utils/TableGen/DXILEmitter.cpp        |  8 ++++----
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index d2d0f11b756038..fc23be5124d142 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -106,7 +106,7 @@ class OpLowerer {
     return false;
   }
 
-  struct Arg {
+  struct ArgSelect {
     enum class Type {
       Index,
       I8,
@@ -117,25 +117,25 @@ class OpLowerer {
   };
 
   [[nodiscard]] bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp,
-                                           ArrayRef<Arg> Args) {
+                                           ArrayRef<ArgSelect> Args) {
     bool IsVectorArgExpansion = isVectorArgExpansion(F);
     return replaceFunction(F, [&](CallInst *CI) -> Error {
       OpBuilder.getIRB().SetInsertPoint(CI);
       SmallVector<Value *> NewArgs;
       if (Args.size()) {
-        for (const Arg &A : Args) {
+        for (const ArgSelect &A : Args) {
           switch (A.Type) {
-          case Arg::Type::Index:
+          case ArgSelect::Type::Index:
             NewArgs.push_back(CI->getArgOperand(A.Value));
             break;
-          case Arg::Type::I8:
+          case ArgSelect::Type::I8:
             NewArgs.push_back(OpBuilder.getIRB().getInt8((uint8_t)A.Value));
             break;
-          case Arg::Type::I32:
+          case ArgSelect::Type::I32:
             NewArgs.push_back(OpBuilder.getIRB().getInt32(A.Value));
             break;
           default:
-            llvm_unreachable("Invalid type of intrinsic arg.");
+            llvm_unreachable("Invalid type of intrinsic arg select.");
           }
         }
       } else if (IsVectorArgExpansion) {
@@ -667,10 +667,10 @@ class OpLowerer {
       switch (ID) {
       default:
         continue;
-#define DXIL_OP_INTRINSIC(OpCode, Intrin, ...)                                   \
-  case Intrin:                                                                   \
-    HasErrors |= replaceFunctionWithOp(F, OpCode,                                \
-                                       ArrayRef<Arg>{ __VA_ARGS__ });            \
+#define DXIL_OP_INTRINSIC(OpCode, Intrin, ...)                                 \
+  case Intrin:                                                                 \
+    HasErrors |= replaceFunctionWithOp(F, OpCode,                              \
+                                       ArrayRef<ArgSelect>{ __VA_ARGS__ });    \
     break;
 #include "DXILOperation.inc"
       case Intrinsic::dx_handle_fromBinding:
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index 06f5f89615c6f3..540af4869787ac 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -451,16 +451,16 @@ static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
       OS << "DXIL_OP_INTRINSIC(dxil::OpCode::" << Op.OpName
          << ", Intrinsic::" << MappedIntr.Intrinsic;
       for (const DXILArgSelect &ArgSelect : MappedIntr.Args) {
-        OS << ", (Arg { ";
+        OS << ", (ArgSelect { ";
         switch (ArgSelect.Type) {
         case DXILArgSelect::Type::Index:
-          OS << "Arg::Type::Index, ";
+          OS << "ArgSelect::Type::Index, ";
           break;
         case DXILArgSelect::Type::I8:
-          OS << "Arg::Type::I8, ";
+          OS << "ArgSelect::Type::I8, ";
           break;
         case DXILArgSelect::Type::I32:
-          OS << "Arg::Type::I32, ";
+          OS << "ArgSelect::Type::I32, ";
           break;
         }
         OS << ArgSelect.Value << "})";

>From e4f8af7a0145e4b2a5a6eb341d736780898641ec Mon Sep 17 00:00:00 2001
From: Adam Yang <31109344+adam-yang at users.noreply.github.com>
Date: Mon, 21 Oct 2024 11:21:25 -0700
Subject: [PATCH 12/24] Comment

---
 llvm/lib/Target/DirectX/DXIL.td | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 1175b7a7aa8846..1930aea22c7759 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -345,6 +345,7 @@ class DXILOp<int opcode, DXILOpClass opclass> {
   // LLVM Intrinsic DXIL Operation maps to
   Intrinsic LLVMIntrinsic = ?;
 
+  // Non-trivial LLVM Intrinsics DXIL Operation maps to
   list<IntrinsicSelect> intrinsic_selects = [];
 
   // Result type of the op

>From 81d928ac9b28879ba4c9c5cc2f2a20a4833f2d73 Mon Sep 17 00:00:00 2001
From: Adam Yang <31109344+adam-yang at users.noreply.github.com>
Date: Mon, 21 Oct 2024 12:21:10 -0700
Subject: [PATCH 13/24] Handling the arg select version for other ops

---
 llvm/lib/Target/DirectX/DXIL.td     |  2 +-
 llvm/utils/TableGen/DXILEmitter.cpp | 12 +++++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 1930aea22c7759..09e13ed2472357 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -326,7 +326,7 @@ class ArgI8<DXILConstant value_> : Arg {
   let is_i8 = 1;
 }
 
-class IntrinsicSelect<Intrinsic intrinsic_, list<Arg> args_=?> {
+class IntrinsicSelect<Intrinsic intrinsic_, list<Arg> args_> {
   Intrinsic intrinsic = intrinsic_;
   list<Arg> args = args_;
 }
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index 540af4869787ac..d68d19320c803a 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -208,13 +208,23 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
           bool IsI8 = Arg->getValueAsBit("is_i8");
           bool IsI32 = Arg->getValueAsBit("is_i32");
           int Index = Arg->getValueAsInt("index");
-          const Record *ValueRec = Arg->getValueAsDef("value");
+          const Record *ValueRec = Arg->getValueAsOptionalDef("value");
 
           DXILArgSelect ArgSelect;
           if (IsI8) {
+            if (!ValueRec) {
+              PrintFatalError(R, Twine("'value' must be defined for i8 "
+                                       "ArgSelect for DXIL operation - ") +
+                                     OpName);
+            }
             ArgSelect.Type = DXILArgSelect::Type::I8;
             ArgSelect.Value = ValueRec->getValueAsInt("value");
           } else if (IsI32) {
+            if (!ValueRec) {
+              PrintFatalError(R, Twine("'value' must be defined for i32 "
+                                       "ArgSelect for DXIL operation - ") +
+                                     OpName);
+            }
             ArgSelect.Type = DXILArgSelect::Type::I32;
             ArgSelect.Value = ValueRec->getValueAsInt("value");
           } else {

>From e2f1c628c33ebc2554ec694a7f900825a04822a1 Mon Sep 17 00:00:00 2001
From: Adam Yang <31109344+adam-yang at users.noreply.github.com>
Date: Mon, 21 Oct 2024 12:43:53 -0700
Subject: [PATCH 14/24] Formatting

---
 llvm/lib/Target/DirectX/DXILOpLowering.cpp | 4 ++--
 llvm/utils/TableGen/DXILEmitter.cpp        | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index fc23be5124d142..a854b69c238b3b 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -669,8 +669,8 @@ class OpLowerer {
         continue;
 #define DXIL_OP_INTRINSIC(OpCode, Intrin, ...)                                 \
   case Intrin:                                                                 \
-    HasErrors |= replaceFunctionWithOp(F, OpCode,                              \
-                                       ArrayRef<ArgSelect>{ __VA_ARGS__ });    \
+    HasErrors |=                                                               \
+        replaceFunctionWithOp(F, OpCode, ArrayRef<ArgSelect>{__VA_ARGS__});    \
     break;
 #include "DXILOperation.inc"
       case Intrinsic::dx_handle_fromBinding:
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index d68d19320c803a..011a5cad186492 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -84,9 +84,9 @@ static void ascendingSortByVersion(std::vector<const Record *> &Recs) {
   });
 }
 
-/// Take a `int_{intrinsic_name}` and return just the intrinsic_name part if available.
-/// Otherwise return the empty string.
-static StringRef GetIntrinsicName(const RecordVal *RV){
+/// Take a `int_{intrinsic_name}` and return just the intrinsic_name part if
+/// available. Otherwise return the empty string.
+static StringRef GetIntrinsicName(const RecordVal *RV) {
   if (RV && RV->getValue()) {
     if (const DefInit *DI = dyn_cast<DefInit>(RV->getValue())) {
       auto *IntrinsicDef = DI->getDef();

>From f1509f7940985ce318eead4449c8a8171a9cd564 Mon Sep 17 00:00:00 2001
From: Adam Yang <31109344+adam-yang at users.noreply.github.com>
Date: Mon, 28 Oct 2024 14:39:41 -0700
Subject: [PATCH 15/24] Addressed feedback

---
 llvm/lib/Target/DirectX/DXILOpLowering.cpp    | 22 +++++++++----------
 .../group_memory_barrier_with_group_sync.ll   |  2 +-
 llvm/utils/TableGen/DXILEmitter.cpp           |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index a854b69c238b3b..21065dcf88afea 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -117,35 +117,35 @@ class OpLowerer {
   };
 
   [[nodiscard]] bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp,
-                                           ArrayRef<ArgSelect> Args) {
+                                           ArrayRef<ArgSelect> ArgSelects) {
     bool IsVectorArgExpansion = isVectorArgExpansion(F);
     return replaceFunction(F, [&](CallInst *CI) -> Error {
       OpBuilder.getIRB().SetInsertPoint(CI);
-      SmallVector<Value *> NewArgs;
-      if (Args.size()) {
-        for (const ArgSelect &A : Args) {
+      SmallVector<Value *> Args;
+      if (ArgSelects.size()) {
+        for (const ArgSelect &A : ArgSelects) {
           switch (A.Type) {
           case ArgSelect::Type::Index:
-            NewArgs.push_back(CI->getArgOperand(A.Value));
+            Args.push_back(CI->getArgOperand(A.Value));
             break;
           case ArgSelect::Type::I8:
-            NewArgs.push_back(OpBuilder.getIRB().getInt8((uint8_t)A.Value));
+            Args.push_back(OpBuilder.getIRB().getInt8((uint8_t)A.Value));
             break;
           case ArgSelect::Type::I32:
-            NewArgs.push_back(OpBuilder.getIRB().getInt32(A.Value));
+            Args.push_back(OpBuilder.getIRB().getInt32(A.Value));
             break;
           default:
             llvm_unreachable("Invalid type of intrinsic arg select.");
           }
         }
       } else if (IsVectorArgExpansion) {
-        NewArgs = argVectorFlatten(CI, OpBuilder.getIRB());
+        Args = argVectorFlatten(CI, OpBuilder.getIRB());
       } else {
-        NewArgs.append(CI->arg_begin(), CI->arg_end());
+        Args.append(CI->arg_begin(), CI->arg_end());
       }
 
-      Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
-          DXILOp, NewArgs, CI->getName(), F.getReturnType());
+      Expected<CallInst *> OpCall =
+          OpBuilder.tryCreateOp(DXILOp, Args, CI->getName(), F.getReturnType());
       if (Error E = OpCall.takeError())
         return E;
 
diff --git a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
index cd588d464e302b..baf93d4e177f0f 100644
--- a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
+++ b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefix=CHECK
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s
 
 define void @test_group_memory_barrier_with_group_sync() {
 entry:
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index 011a5cad186492..8bebe608eece47 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -39,7 +39,7 @@ struct DXILArgSelect {
     I8,
   };
   Type Type = Type::Index;
-  int Value = 0;
+  int Value = -1;
 };
 struct DXILIntrinsicSelect {
   StringRef Intrinsic;

>From fc71b61b70664b7505a8a9c929b306229ec87cb4 Mon Sep 17 00:00:00 2001
From: Adam Yang <hanbyang at microsoft.com>
Date: Wed, 30 Oct 2024 20:47:05 -0700
Subject: [PATCH 16/24] Fixed the warnings on clang

---
 llvm/lib/Target/DirectX/DXILOpLowering.cpp | 2 --
 llvm/utils/TableGen/DXILEmitter.cpp        | 6 +++---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 21065dcf88afea..a8ee45334e327e 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -134,8 +134,6 @@ class OpLowerer {
           case ArgSelect::Type::I32:
             Args.push_back(OpBuilder.getIRB().getInt32(A.Value));
             break;
-          default:
-            llvm_unreachable("Invalid type of intrinsic arg select.");
           }
         }
       } else if (IsVectorArgExpansion) {
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index 8bebe608eece47..4656438d64c80a 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -459,9 +459,9 @@ static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
     }
     for (const DXILIntrinsicSelect &MappedIntr : Op.IntrinsicSelects) {
       OS << "DXIL_OP_INTRINSIC(dxil::OpCode::" << Op.OpName
-         << ", Intrinsic::" << MappedIntr.Intrinsic;
+         << ", Intrinsic::" << MappedIntr.Intrinsic << ", ";
       for (const DXILArgSelect &ArgSelect : MappedIntr.Args) {
-        OS << ", (ArgSelect { ";
+        OS << "(ArgSelect { ";
         switch (ArgSelect.Type) {
         case DXILArgSelect::Type::Index:
           OS << "ArgSelect::Type::Index, ";
@@ -473,7 +473,7 @@ static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
           OS << "ArgSelect::Type::I32, ";
           break;
         }
-        OS << ArgSelect.Value << "})";
+        OS << ArgSelect.Value << "}), ";
       }
       OS << ")\n";
     }

>From 065fc8e518581559b366177e41ac0520653d4062 Mon Sep 17 00:00:00 2001
From: Adam Yang <31109344+adam-yang at users.noreply.github.com>
Date: Wed, 6 Nov 2024 13:19:27 -0800
Subject: [PATCH 17/24] Got rid of DXILConstant in the tablegen

---
 llvm/lib/Target/DirectX/DXIL.td     | 24 +++++++++---------------
 llvm/utils/TableGen/DXILEmitter.cpp | 16 +++-------------
 2 files changed, 12 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 09e13ed2472357..00c0e78db912d8 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -294,34 +294,28 @@ class Attributes<Version ver = DXIL1_0, list<DXILAttribute> attrs> {
   list<DXILAttribute> op_attrs = attrs;
 }
 
-class DXILConstant<int value_> {
-  int value = value_;
-}
-
-defset list<DXILConstant> BarrierModes = {
-  def BarrierMode_DeviceMemoryBarrier              : DXILConstant<2>;
-  def BarrierMode_DeviceMemoryBarrierWithGroupSync : DXILConstant<3>;
-  def BarrierMode_GroupMemoryBarrier               : DXILConstant<8>;
-  def BarrierMode_GroupMemoryBarrierWithGroupSync  : DXILConstant<9>;
-  def BarrierMode_AllMemoryBarrier                 : DXILConstant<10>;
-  def BarrierMode_AllMemoryBarrierWithGroupSync    : DXILConstant<11>;
-}
+defvar BarrierMode_DeviceMemoryBarrier              = 2;
+defvar BarrierMode_DeviceMemoryBarrierWithGroupSync = 3;
+defvar BarrierMode_GroupMemoryBarrier               = 8;
+defvar BarrierMode_GroupMemoryBarrierWithGroupSync  = 9;
+defvar BarrierMode_AllMemoryBarrier                 = 10;
+defvar BarrierMode_AllMemoryBarrierWithGroupSync    = 11;
 
 // Intrinsic arg selection
 class Arg {
   int index = -1;
-  DXILConstant value;
+  int value = 0;
   bit is_i8 = 0;
   bit is_i32 = 0;
 }
 class ArgSelect<int index_> : Arg {
   let index = index_;
 }
-class ArgI32<DXILConstant value_> : Arg {
+class ArgI32<int value_> : Arg {
   let value = value_;
   let is_i32 = 1;
 }
-class ArgI8<DXILConstant value_> : Arg {
+class ArgI8<int value_> : Arg {
   let value = value_;
   let is_i8 = 1;
 }
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index 4656438d64c80a..b75696db410525 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -208,25 +208,15 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
           bool IsI8 = Arg->getValueAsBit("is_i8");
           bool IsI32 = Arg->getValueAsBit("is_i32");
           int Index = Arg->getValueAsInt("index");
-          const Record *ValueRec = Arg->getValueAsOptionalDef("value");
+          int Value = Arg->getValueAsInt("value");
 
           DXILArgSelect ArgSelect;
           if (IsI8) {
-            if (!ValueRec) {
-              PrintFatalError(R, Twine("'value' must be defined for i8 "
-                                       "ArgSelect for DXIL operation - ") +
-                                     OpName);
-            }
             ArgSelect.Type = DXILArgSelect::Type::I8;
-            ArgSelect.Value = ValueRec->getValueAsInt("value");
+            ArgSelect.Value = Value;
           } else if (IsI32) {
-            if (!ValueRec) {
-              PrintFatalError(R, Twine("'value' must be defined for i32 "
-                                       "ArgSelect for DXIL operation - ") +
-                                     OpName);
-            }
             ArgSelect.Type = DXILArgSelect::Type::I32;
-            ArgSelect.Value = ValueRec->getValueAsInt("value");
+            ArgSelect.Value = Value;
           } else {
             if (Index < 0) {
               PrintFatalError(

>From 6cca5c9923af4549a78b2a7fa4d1e66fd2c2eb1d Mon Sep 17 00:00:00 2001
From: Adam Yang <hanbyang at microsoft.com>
Date: Tue, 12 Nov 2024 16:42:18 -0800
Subject: [PATCH 18/24] Renamed everything, no longer duplicating the structs
 three times

---
 llvm/lib/Target/DirectX/DXIL.td            | 38 +++++-----
 llvm/lib/Target/DirectX/DXILOpLowering.cpp | 23 ++----
 llvm/utils/TableGen/DXILEmitter.cpp        | 86 +++++++++-------------
 3 files changed, 61 insertions(+), 86 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 00c0e78db912d8..8514ef86f73b8b 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -302,28 +302,24 @@ defvar BarrierMode_AllMemoryBarrier                 = 10;
 defvar BarrierMode_AllMemoryBarrierWithGroupSync    = 11;
 
 // Intrinsic arg selection
-class Arg {
-  int index = -1;
-  int value = 0;
-  bit is_i8 = 0;
-  bit is_i32 = 0;
-}
-class ArgSelect<int index_> : Arg {
-  let index = index_;
-}
-class ArgI32<int value_> : Arg {
-  let value = value_;
-  let is_i32 = 1;
-}
-class ArgI8<int value_> : Arg {
-  let value = value_;
-  let is_i8 = 1;
+class IntrinArgSelectType;
+def IntrinArgSelect_Index : IntrinArgSelectType;
+def IntrinArgSelect_I8    : IntrinArgSelectType;
+def IntrinArgSelect_I32   : IntrinArgSelectType;
+
+class IntrinArgSelect<IntrinArgSelectType type_, int value_> {
+  IntrinArgSelectType type = type_;
+  int value = value_;
 }
+class IntrinArgIndex<int index> : IntrinArgSelect<IntrinArgSelect_Index, index>;
+class IntrinArgI8   <int value> : IntrinArgSelect<IntrinArgSelect_I8,    value>;
+class IntrinArgI32  <int value> : IntrinArgSelect<IntrinArgSelect_I32,   value>;
 
-class IntrinsicSelect<Intrinsic intrinsic_, list<Arg> args_> {
+class IntrinWithArgs<Intrinsic intrinsic_, list<IntrinArgSelect> arg_selects_> {
   Intrinsic intrinsic = intrinsic_;
-  list<Arg> args = args_;
+  list<IntrinArgSelect> arg_selects = arg_selects_;
 }
+class Intrin<Intrinsic intrinsic_> : IntrinWithArgs<intrinsic_, []> {}
 
 // Abstraction DXIL Operation
 class DXILOp<int opcode, DXILOpClass opclass> {
@@ -340,7 +336,7 @@ class DXILOp<int opcode, DXILOpClass opclass> {
   Intrinsic LLVMIntrinsic = ?;
 
   // Non-trivial LLVM Intrinsics DXIL Operation maps to
-  list<IntrinsicSelect> intrinsic_selects = [];
+  list<IntrinWithArgs> intrinsic_selects = [];
 
   // Result type of the op
   DXILOpParamType result;
@@ -951,9 +947,9 @@ def WaveAllBitCount : DXILOp<135, waveAllOp> {
 def Barrier : DXILOp<80, barrier> {
   let Doc = "inserts a memory barrier in the shader";
   let intrinsic_selects = [
-    IntrinsicSelect<
+    IntrinWithArgs<
         int_dx_group_memory_barrier_with_group_sync,
-        [ ArgI32<BarrierMode_GroupMemoryBarrierWithGroupSync> ]>,
+        [ IntrinArgI32<BarrierMode_GroupMemoryBarrierWithGroupSync> ]>,
   ];
 
   let arguments = [Int32Ty];
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index a8ee45334e327e..fc162b71c0786d 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -106,32 +106,25 @@ class OpLowerer {
     return false;
   }
 
-  struct ArgSelect {
-    enum class Type {
-      Index,
-      I8,
-      I32,
-    };
-    Type Type = Type::Index;
-    int Value = -1;
-  };
+#define DXIL_OP_INTRINSIC_ARG_SELECT_TYPES
+#include "DXILOperation.inc"
 
   [[nodiscard]] bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp,
-                                           ArrayRef<ArgSelect> ArgSelects) {
+                                           ArrayRef<IntrinArgSelect> ArgSelects) {
     bool IsVectorArgExpansion = isVectorArgExpansion(F);
     return replaceFunction(F, [&](CallInst *CI) -> Error {
       OpBuilder.getIRB().SetInsertPoint(CI);
       SmallVector<Value *> Args;
       if (ArgSelects.size()) {
-        for (const ArgSelect &A : ArgSelects) {
+        for (const IntrinArgSelect &A : ArgSelects) {
           switch (A.Type) {
-          case ArgSelect::Type::Index:
+          case IntrinArgSelect::Type::Index:
             Args.push_back(CI->getArgOperand(A.Value));
             break;
-          case ArgSelect::Type::I8:
+          case IntrinArgSelect::Type::I8:
             Args.push_back(OpBuilder.getIRB().getInt8((uint8_t)A.Value));
             break;
-          case ArgSelect::Type::I32:
+          case IntrinArgSelect::Type::I32:
             Args.push_back(OpBuilder.getIRB().getInt32(A.Value));
             break;
           }
@@ -668,7 +661,7 @@ class OpLowerer {
 #define DXIL_OP_INTRINSIC(OpCode, Intrin, ...)                                 \
   case Intrin:                                                                 \
     HasErrors |=                                                               \
-        replaceFunctionWithOp(F, OpCode, ArrayRef<ArgSelect>{__VA_ARGS__});    \
+        replaceFunctionWithOp(F, OpCode, ArrayRef<IntrinArgSelect>{__VA_ARGS__});    \
     break;
 #include "DXILOperation.inc"
       case Intrinsic::dx_handle_fromBinding:
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index b75696db410525..37ce442bdcdb85 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -32,20 +32,19 @@ using namespace llvm::dxil;
 
 namespace {
 
-struct DXILArgSelect {
-  enum class Type {
-    Index,
-    I32,
-    I8,
-  };
-  Type Type = Type::Index;
-  int Value = -1;
-};
 struct DXILIntrinsicSelect {
   StringRef Intrinsic;
-  SmallVector<DXILArgSelect, 4> Args;
+  SmallVector<const Record *> ArgSelectRecords;
 };
 
+static StringRef StripIntrinArgSelectTypePrefix(StringRef Type) {
+  StringRef Prefix = "IntrinArgSelect_";
+  if (!Type.starts_with(Prefix)) {
+    PrintFatalError("IntrinArgSelectType definintion must be prefixed with 'IntrinArgSelect_'");
+  }
+  return Type.substr(Prefix.size());
+}
+
 struct DXILOperationDesc {
   std::string OpName; // name of DXIL operation
   int OpCode;         // ID of DXIL operation
@@ -203,32 +202,9 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
       for (const Record *R : IntrinsicSelectRecords) {
         DXILIntrinsicSelect IntrSelect;
         IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("intrinsic"));
-        auto Args = R->getValueAsListOfDefs("args");
-        for (const Record *Arg : Args) {
-          bool IsI8 = Arg->getValueAsBit("is_i8");
-          bool IsI32 = Arg->getValueAsBit("is_i32");
-          int Index = Arg->getValueAsInt("index");
-          int Value = Arg->getValueAsInt("value");
-
-          DXILArgSelect ArgSelect;
-          if (IsI8) {
-            ArgSelect.Type = DXILArgSelect::Type::I8;
-            ArgSelect.Value = Value;
-          } else if (IsI32) {
-            ArgSelect.Type = DXILArgSelect::Type::I32;
-            ArgSelect.Value = Value;
-          } else {
-            if (Index < 0) {
-              PrintFatalError(
-                  R, Twine("Index in ArgSelect<index> must be equal to or "
-                           "greater than 0 for DXIL operation - ") +
-                         OpName);
-            }
-            ArgSelect.Type = DXILArgSelect::Type::Index;
-            ArgSelect.Value = Index;
-          }
-
-          IntrSelect.Args.emplace_back(std::move(ArgSelect));
+        auto Args = R->getValueAsListOfDefs("arg_selects");
+        for (const Record *ArgSelect : Args) {
+          IntrSelect.ArgSelectRecords.emplace_back(ArgSelect);
         }
         IntrinsicSelects.emplace_back(std::move(IntrSelect));
       }
@@ -441,6 +417,7 @@ static void emitDXILOpFunctionTypes(ArrayRef<DXILOperationDesc> Ops,
 /// \param Output stream
 static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
                                  raw_ostream &OS) {
+
   OS << "#ifdef DXIL_OP_INTRINSIC\n";
   OS << "\n";
   for (const auto &Op : Ops) {
@@ -450,20 +427,12 @@ static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
     for (const DXILIntrinsicSelect &MappedIntr : Op.IntrinsicSelects) {
       OS << "DXIL_OP_INTRINSIC(dxil::OpCode::" << Op.OpName
          << ", Intrinsic::" << MappedIntr.Intrinsic << ", ";
-      for (const DXILArgSelect &ArgSelect : MappedIntr.Args) {
-        OS << "(ArgSelect { ";
-        switch (ArgSelect.Type) {
-        case DXILArgSelect::Type::Index:
-          OS << "ArgSelect::Type::Index, ";
-          break;
-        case DXILArgSelect::Type::I8:
-          OS << "ArgSelect::Type::I8, ";
-          break;
-        case DXILArgSelect::Type::I32:
-          OS << "ArgSelect::Type::I32, ";
-          break;
-        }
-        OS << ArgSelect.Value << "}), ";
+      for (const Record *ArgSelect : MappedIntr.ArgSelectRecords) {
+        std::string Type = ArgSelect->getValueAsDef("type")->getNameInitAsString();
+        int Value = ArgSelect->getValueAsInt("value");
+        OS << "(IntrinArgSelect{"
+          << "IntrinArgSelect::Type::" << StripIntrinArgSelectTypePrefix(Type) << ","
+          << Value << "}), ";
       }
       OS << ")\n";
     }
@@ -473,6 +442,22 @@ static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
   OS << "#endif\n\n";
 }
 
+static void emitDXILIntrinsicArgSelectTypes(const RecordKeeper &Records, raw_ostream &OS) {
+  OS << "#ifdef DXIL_OP_INTRINSIC_ARG_SELECT_TYPES\n";
+  OS << "struct IntrinArgSelect {\n";
+  OS << "  enum class Type {\n";
+  for (const Record *Records : Records.getAllDerivedDefinitions("IntrinArgSelectType")) {
+    StringRef StrippedName = StripIntrinArgSelectTypePrefix(Records->getName());
+    OS << "    " << StrippedName << ",\n";
+  }
+  OS << "  };\n";
+  OS << "  Type Type;\n";
+  OS << "  int Value;\n";
+  OS << "};\n";
+  OS << "#undef DXIL_OP_INTRINSIC_ARG_SELECT_TYPES\n";
+  OS << "#endif\n\n";
+}
+
 /// Emit DXIL operation table
 /// \param A vector of DXIL Ops
 /// \param Output stream
@@ -613,6 +598,7 @@ static void emitDxilOperation(const RecordKeeper &Records, raw_ostream &OS) {
   emitDXILOpClasses(Records, OS);
   emitDXILOpParamTypes(Records, OS);
   emitDXILOpFunctionTypes(DXILOps, OS);
+  emitDXILIntrinsicArgSelectTypes(Records, OS);
   emitDXILIntrinsicMap(DXILOps, OS);
   OS << "#ifdef DXIL_OP_OPERATION_TABLE\n\n";
   emitDXILOperationTableDataStructs(Records, OS);

>From f264179acb82771e30eab1453e58e5ca411aacc2 Mon Sep 17 00:00:00 2001
From: Adam Yang <hanbyang at microsoft.com>
Date: Tue, 12 Nov 2024 17:05:33 -0800
Subject: [PATCH 19/24] Got rid of the default LLVMIntrinsic in DXILOp

---
 llvm/lib/Target/DirectX/DXIL.td     | 115 ++++++++++++++++------------
 llvm/utils/TableGen/DXILEmitter.cpp |   7 --
 2 files changed, 68 insertions(+), 54 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 8514ef86f73b8b..ffdec9dfc2698f 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -315,11 +315,35 @@ class IntrinArgIndex<int index> : IntrinArgSelect<IntrinArgSelect_Index, index>;
 class IntrinArgI8   <int value> : IntrinArgSelect<IntrinArgSelect_I8,    value>;
 class IntrinArgI32  <int value> : IntrinArgSelect<IntrinArgSelect_I32,   value>;
 
-class IntrinWithArgs<Intrinsic intrinsic_, list<IntrinArgSelect> arg_selects_> {
+// Select which intrinsic to lower from for a DXILOp.
+// If the intrinsic is the only argument given to IntrinSelect, then the arguments of the intrinsic will be copied
+// in the same order. Example:
+//   let intrinsic_selects = [
+//     IntrinSelect<int_dx_my_intrinsic>,
+//     IntrinSelect<int_dx_my_intrinsic2>,
+//   ]
+//=========================================================================================
+// Using IntrinArgIndex<>, arguments of the intrinsic can be copied in specific order:
+//   let intrinsic_selects = [
+//     IntrinSelect<int_dx_my_intrinsic,
+//       [IntrinArgIndex<2>, IntrinArgIndex<1>, IntrinArgIndex<0>> ]
+//     >,
+//   ]
+//=========================================================================================
+// to the dxil op. This can be used in conjunction with IntrinArgIndex:
+//   let intrinsic_selects = [
+//     IntrinSelect<int_dx_wave_active_usum,
+//       [ IntrinArgIndex<0>, IntrinArgI8<0>, IntrinArgI8<1> ]
+//     >,
+//     IntrinSelect<int_dx_wave_active_sum,
+//       [ IntrinArgIndex<0>, IntrinArgI8<0>, IntrinArgI8<0> ]
+//     >,
+//   ]
+//
+class IntrinSelect<Intrinsic intrinsic_, list<IntrinArgSelect> arg_selects_=[]> {
   Intrinsic intrinsic = intrinsic_;
   list<IntrinArgSelect> arg_selects = arg_selects_;
 }
-class Intrin<Intrinsic intrinsic_> : IntrinWithArgs<intrinsic_, []> {}
 
 // Abstraction DXIL Operation
 class DXILOp<int opcode, DXILOpClass opclass> {
@@ -332,10 +356,7 @@ class DXILOp<int opcode, DXILOpClass opclass> {
   // Class of DXIL Operation.
   DXILOpClass OpClass = opclass;
 
-  // LLVM Intrinsic DXIL Operation maps to
-  Intrinsic LLVMIntrinsic = ?;
-
-  // Non-trivial LLVM Intrinsics DXIL Operation maps to
+  // LLVM Intrinsics DXIL Operation maps from
   list<IntrinWithArgs> intrinsic_selects = [];
 
   // Result type of the op
@@ -358,7 +379,7 @@ class DXILOp<int opcode, DXILOpClass opclass> {
 
 def Abs :  DXILOp<6, unary> {
   let Doc = "Returns the absolute value of the input.";
-  let LLVMIntrinsic = int_fabs;
+  let intrinsic_selects = [ IntrinSelect<int_fabs> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy]>];
@@ -368,7 +389,7 @@ def Abs :  DXILOp<6, unary> {
 
 def Saturate :  DXILOp<7, unary> {
   let Doc = "Clamps a single or double precision floating point value to [0.0f...1.0f].";
-  let LLVMIntrinsic = int_dx_saturate;
+  let intrinsic_selects = [ IntrinSelect<int_dx_saturate> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy]>];
@@ -378,7 +399,7 @@ def Saturate :  DXILOp<7, unary> {
 
 def IsInf :  DXILOp<9, isSpecialFloat> {
   let Doc = "Determines if the specified value is infinite.";
-  let LLVMIntrinsic = int_dx_isinf;
+  let intrinsic_selects = [ IntrinSelect<int_dx_isinf> ];
   let arguments = [OverloadTy];
   let result = Int1Ty;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -388,7 +409,7 @@ def IsInf :  DXILOp<9, isSpecialFloat> {
 
 def Cos :  DXILOp<12, unary> {
   let Doc = "Returns cosine(theta) for theta in radians.";
-  let LLVMIntrinsic = int_cos;
+  let intrinsic_selects = [ IntrinSelect<int_cos> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -398,7 +419,7 @@ def Cos :  DXILOp<12, unary> {
 
 def Sin :  DXILOp<13, unary> {
   let Doc = "Returns sine(theta) for theta in radians.";
-  let LLVMIntrinsic = int_sin;
+  let intrinsic_selects = [ IntrinSelect<int_sin> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -408,7 +429,7 @@ def Sin :  DXILOp<13, unary> {
 
 def Tan :  DXILOp<14, unary> {
   let Doc = "Returns tangent(theta) for theta in radians.";
-  let LLVMIntrinsic = int_tan;
+  let intrinsic_selects = [ IntrinSelect<int_tan> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -418,7 +439,7 @@ def Tan :  DXILOp<14, unary> {
 
 def ACos :  DXILOp<15, unary> {
   let Doc = "Returns the arccosine of the specified value.";
-  let LLVMIntrinsic = int_acos;
+  let intrinsic_selects = [ IntrinSelect<int_acos> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -428,7 +449,7 @@ def ACos :  DXILOp<15, unary> {
 
 def ASin :  DXILOp<16, unary> {
   let Doc = "Returns the arcsine of the specified value.";
-  let LLVMIntrinsic = int_asin;
+  let intrinsic_selects = [ IntrinSelect<int_asin> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -438,7 +459,7 @@ def ASin :  DXILOp<16, unary> {
 
 def ATan :  DXILOp<17, unary> {
   let Doc = "Returns the arctangent of the specified value.";
-  let LLVMIntrinsic = int_atan;
+  let intrinsic_selects = [ IntrinSelect<int_atan> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -448,7 +469,7 @@ def ATan :  DXILOp<17, unary> {
 
 def HCos :  DXILOp<18, unary> {
   let Doc = "Returns the hyperbolic cosine of the specified value.";
-  let LLVMIntrinsic = int_cosh;
+  let intrinsic_selects = [ IntrinSelect<int_cosh> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -458,7 +479,7 @@ def HCos :  DXILOp<18, unary> {
 
 def HSin :  DXILOp<19, unary> {
   let Doc = "Returns the hyperbolic sine of the specified value.";
-  let LLVMIntrinsic = int_sinh;
+  let intrinsic_selects = [ IntrinSelect<int_sinh> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -468,7 +489,7 @@ def HSin :  DXILOp<19, unary> {
 
 def HTan :  DXILOp<20, unary> {
   let Doc = "Returns the hyperbolic tan of the specified value.";
-  let LLVMIntrinsic = int_tanh;
+  let intrinsic_selects = [ IntrinSelect<int_tanh> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -479,7 +500,7 @@ def HTan :  DXILOp<20, unary> {
 def Exp2 :  DXILOp<21, unary> {
   let Doc = "Returns the base 2 exponential, or 2**x, of the specified value. "
             "exp2(x) = 2**x.";
-  let LLVMIntrinsic = int_exp2;
+  let intrinsic_selects = [ IntrinSelect<int_exp2> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -490,7 +511,7 @@ def Exp2 :  DXILOp<21, unary> {
 def Frac :  DXILOp<22, unary> {
   let Doc = "Returns a fraction from 0 to 1 that represents the decimal part "
             "of the input.";
-  let LLVMIntrinsic = int_dx_frac;
+  let intrinsic_selects = [ IntrinSelect<int_dx_frac> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -500,7 +521,7 @@ def Frac :  DXILOp<22, unary> {
 
 def Log2 :  DXILOp<23, unary> {
   let Doc = "Returns the base-2 logarithm of the specified value.";
-  let LLVMIntrinsic = int_log2;
+  let intrinsic_selects = [ IntrinSelect<int_log2> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -511,7 +532,7 @@ def Log2 :  DXILOp<23, unary> {
 def Sqrt :  DXILOp<24, unary> {
   let Doc = "Returns the square root of the specified floating-point value, "
             "per component.";
-  let LLVMIntrinsic = int_sqrt;
+  let intrinsic_selects = [ IntrinSelect<int_sqrt> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -522,7 +543,7 @@ def Sqrt :  DXILOp<24, unary> {
 def RSqrt :  DXILOp<25, unary> {
   let Doc = "Returns the reciprocal of the square root of the specified value. "
             "rsqrt(x) = 1 / sqrt(x).";
-  let LLVMIntrinsic = int_dx_rsqrt;
+  let intrinsic_selects = [ IntrinSelect<int_dx_rsqrt> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -533,7 +554,7 @@ def RSqrt :  DXILOp<25, unary> {
 def Round :  DXILOp<26, unary> {
   let Doc = "Returns the input rounded to the nearest integer within a "
             "floating-point type.";
-  let LLVMIntrinsic = int_roundeven;
+  let intrinsic_selects = [ IntrinSelect<int_roundeven> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -544,7 +565,7 @@ def Round :  DXILOp<26, unary> {
 def Floor :  DXILOp<27, unary> {
   let Doc =
       "Returns the largest integer that is less than or equal to the input.";
-  let LLVMIntrinsic = int_floor;
+  let intrinsic_selects = [ IntrinSelect<int_floor> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -555,7 +576,7 @@ def Floor :  DXILOp<27, unary> {
 def Ceil :  DXILOp<28, unary> {
   let Doc = "Returns the smallest integer that is greater than or equal to the "
             "input.";
-  let LLVMIntrinsic = int_ceil;
+  let intrinsic_selects = [ IntrinSelect<int_ceil> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -565,7 +586,7 @@ def Ceil :  DXILOp<28, unary> {
 
 def Trunc :  DXILOp<29, unary> {
   let Doc = "Returns the specified value truncated to the integer component.";
-  let LLVMIntrinsic = int_trunc;
+  let intrinsic_selects = [ IntrinSelect<int_trunc> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -575,7 +596,7 @@ def Trunc :  DXILOp<29, unary> {
 
 def Rbits :  DXILOp<30, unary> {
   let Doc = "Returns the specified value with its bits reversed.";
-  let LLVMIntrinsic = int_bitreverse;
+  let intrinsic_selects = [ IntrinSelect<int_bitreverse> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -620,7 +641,7 @@ def FirstbitSHi :  DXILOp<34, unaryBits> {
 
 def FMax :  DXILOp<35, binary> {
   let Doc = "Float maximum. FMax(a,b) = a > b ? a : b";
-  let LLVMIntrinsic = int_maxnum;
+  let intrinsic_selects = [ IntrinSelect<int_maxnum> ];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -631,7 +652,7 @@ def FMax :  DXILOp<35, binary> {
 
 def FMin :  DXILOp<36, binary> {
   let Doc = "Float minimum. FMin(a,b) = a < b ? a : b";
-  let LLVMIntrinsic = int_minnum;
+  let intrinsic_selects = [ IntrinSelect<int_minnum> ];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -642,7 +663,7 @@ def FMin :  DXILOp<36, binary> {
 
 def SMax :  DXILOp<37, binary> {
   let Doc = "Signed integer maximum. SMax(a,b) = a > b ? a : b";
-  let LLVMIntrinsic = int_smax;
+  let intrinsic_selects = [ IntrinSelect<int_smax> ];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -653,7 +674,7 @@ def SMax :  DXILOp<37, binary> {
 
 def SMin :  DXILOp<38, binary> {
   let Doc = "Signed integer minimum. SMin(a,b) = a < b ? a : b";
-  let LLVMIntrinsic = int_smin;
+  let intrinsic_selects = [ IntrinSelect<int_smin> ];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -664,7 +685,7 @@ def SMin :  DXILOp<38, binary> {
 
 def UMax :  DXILOp<39, binary> {
   let Doc = "Unsigned integer maximum. UMax(a,b) = a > b ? a : b";
-  let LLVMIntrinsic = int_umax;
+  let intrinsic_selects = [ IntrinSelect<int_umax> ];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -675,7 +696,7 @@ def UMax :  DXILOp<39, binary> {
 
 def UMin :  DXILOp<40, binary> {
   let Doc = "Unsigned integer minimum. UMin(a,b) = a < b ? a : b";
-  let LLVMIntrinsic = int_umin;
+  let intrinsic_selects = [ IntrinSelect<int_umin> ];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -687,7 +708,7 @@ def UMin :  DXILOp<40, binary> {
 def FMad :  DXILOp<46, tertiary> {
   let Doc = "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m "
             "* a + b.";
-  let LLVMIntrinsic = int_fmuladd;
+  let intrinsic_selects = [ IntrinSelect<int_fmuladd> ];
   let arguments = [OverloadTy, OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -699,7 +720,7 @@ def FMad :  DXILOp<46, tertiary> {
 def IMad :  DXILOp<48, tertiary> {
   let Doc = "Signed integer arithmetic multiply/add operation. imad(m,a,b) = m "
             "* a + b.";
-  let LLVMIntrinsic = int_dx_imad;
+  let intrinsic_selects = [ IntrinSelect<int_dx_imad> ];
   let arguments = [OverloadTy, OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -711,7 +732,7 @@ def IMad :  DXILOp<48, tertiary> {
 def UMad :  DXILOp<49, tertiary> {
   let Doc = "Unsigned integer arithmetic multiply/add operation. umad(m,a, = m "
             "* a + b.";
-  let LLVMIntrinsic = int_dx_umad;
+  let intrinsic_selects = [ IntrinSelect<int_dx_umad> ];
   let arguments = [OverloadTy, OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -723,7 +744,7 @@ def UMad :  DXILOp<49, tertiary> {
 def Dot2 :  DXILOp<54, dot2> {
   let Doc = "dot product of two float vectors Dot(a,b) = a[0]*b[0] + ... + "
             "a[n]*b[n] where n is 0 to 1 inclusive";
-  let LLVMIntrinsic = int_dx_dot2;
+  let intrinsic_selects = [ IntrinSelect<int_dx_dot2> ];
   let arguments = !listsplat(OverloadTy, 4);
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -734,7 +755,7 @@ def Dot2 :  DXILOp<54, dot2> {
 def Dot3 :  DXILOp<55, dot3> {
   let Doc = "dot product of two float vectors Dot(a,b) = a[0]*b[0] + ... + "
             "a[n]*b[n] where n is 0 to 2 inclusive";
-  let LLVMIntrinsic = int_dx_dot3;
+  let intrinsic_selects = [ IntrinSelect<int_dx_dot3> ];
   let arguments = !listsplat(OverloadTy, 6);
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -745,7 +766,7 @@ def Dot3 :  DXILOp<55, dot3> {
 def Dot4 :  DXILOp<56, dot4> {
   let Doc = "dot product of two float vectors Dot(a,b) = a[0]*b[0] + ... + "
             "a[n]*b[n] where n is 0 to 3 inclusive";
-  let LLVMIntrinsic = int_dx_dot4;
+  let intrinsic_selects = [ IntrinSelect<int_dx_dot4> ];
   let arguments = !listsplat(OverloadTy, 8);
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -810,7 +831,7 @@ def Discard : DXILOp<82, discard> {
 
 def ThreadId :  DXILOp<93, threadId> {
   let Doc = "Reads the thread ID";
-  let LLVMIntrinsic = int_dx_thread_id;
+  let intrinsic_selects = [ IntrinSelect<int_dx_thread_id> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
@@ -820,7 +841,7 @@ def ThreadId :  DXILOp<93, threadId> {
 
 def GroupId :  DXILOp<94, groupId> {
   let Doc = "Reads the group ID (SV_GroupID)";
-  let LLVMIntrinsic = int_dx_group_id;
+  let intrinsic_selects = [ IntrinSelect<int_dx_group_id> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
@@ -830,7 +851,7 @@ def GroupId :  DXILOp<94, groupId> {
 
 def ThreadIdInGroup :  DXILOp<95, threadIdInGroup> {
   let Doc = "Reads the thread ID within the group  (SV_GroupThreadID)";
-  let LLVMIntrinsic = int_dx_thread_id_in_group;
+  let intrinsic_selects = [ IntrinSelect<int_dx_thread_id_in_group> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
@@ -841,7 +862,7 @@ def ThreadIdInGroup :  DXILOp<95, threadIdInGroup> {
 def FlattenedThreadIdInGroup :  DXILOp<96, flattenedThreadIdInGroup> {
   let Doc = "Provides a flattened index for a given thread within a given "
             "group (SV_GroupIndex)";
-  let LLVMIntrinsic = int_dx_flattened_thread_id_in_group;
+  let intrinsic_selects = [ IntrinSelect<int_dx_flattened_thread_id_in_group> ];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
   let stages = [Stages<DXIL1_0, [compute, mesh, amplification, node]>];
@@ -910,7 +931,7 @@ def WaveActiveAnyTrue : DXILOp<113, waveAnyTrue> {
 
 def WaveIsFirstLane :  DXILOp<110, waveIsFirstLane> {
   let Doc = "returns 1 for the first lane in the wave";
-  let LLVMIntrinsic = int_dx_wave_is_first_lane;
+  let intrinsic_selects = [ IntrinSelect<int_dx_wave_is_first_lane> ];
   let arguments = [];
   let result = Int1Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
@@ -919,7 +940,7 @@ def WaveIsFirstLane :  DXILOp<110, waveIsFirstLane> {
 
 def WaveReadLaneAt:  DXILOp<117, waveReadLaneAt> {
   let Doc = "returns the value from the specified lane";
-  let LLVMIntrinsic = int_dx_wave_readlane;
+  let intrinsic_selects = [ IntrinSelect<int_dx_wave_readlane> ];
   let arguments = [OverloadTy, Int32Ty];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy, Int1Ty, Int16Ty, Int32Ty, Int64Ty]>];
@@ -929,7 +950,7 @@ def WaveReadLaneAt:  DXILOp<117, waveReadLaneAt> {
 
 def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> {
   let Doc = "returns the index of the current lane in the wave";
-  let LLVMIntrinsic = int_dx_wave_getlaneindex;
+  let intrinsic_selects = [ IntrinSelect<int_dx_wave_getlaneindex> ];
   let arguments = [];
   let result = Int32Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index 37ce442bdcdb85..fbf9891b7c9853 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -184,13 +184,6 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
                            OpName);
   }
 
-  {
-    DXILIntrinsicSelect IntrSelect;
-    IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("LLVMIntrinsic"));
-    if (IntrSelect.Intrinsic.size())
-      IntrinsicSelects.emplace_back(std::move(IntrSelect));
-  }
-
   auto IntrinsicSelectRecords = R->getValueAsListOfDefs("intrinsic_selects");
   if (IntrinsicSelectRecords.size()) {
     if (IntrinsicSelects.size()) {

>From c7ead3ec4a90c25cc58b3f83d5fa6a8e707d4852 Mon Sep 17 00:00:00 2001
From: Adam Yang <hanbyang at microsoft.com>
Date: Tue, 12 Nov 2024 17:09:58 -0800
Subject: [PATCH 20/24] Formatting, and missing comment

---
 llvm/lib/Target/DirectX/DXILOpLowering.cpp |  9 +++++----
 llvm/utils/TableGen/DXILEmitter.cpp        | 17 +++++++++++------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index fc162b71c0786d..f07f1bddbc8c0d 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -109,8 +109,9 @@ class OpLowerer {
 #define DXIL_OP_INTRINSIC_ARG_SELECT_TYPES
 #include "DXILOperation.inc"
 
-  [[nodiscard]] bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp,
-                                           ArrayRef<IntrinArgSelect> ArgSelects) {
+  [[nodiscard]] bool
+  replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp,
+                        ArrayRef<IntrinArgSelect> ArgSelects) {
     bool IsVectorArgExpansion = isVectorArgExpansion(F);
     return replaceFunction(F, [&](CallInst *CI) -> Error {
       OpBuilder.getIRB().SetInsertPoint(CI);
@@ -660,8 +661,8 @@ class OpLowerer {
         continue;
 #define DXIL_OP_INTRINSIC(OpCode, Intrin, ...)                                 \
   case Intrin:                                                                 \
-    HasErrors |=                                                               \
-        replaceFunctionWithOp(F, OpCode, ArrayRef<IntrinArgSelect>{__VA_ARGS__});    \
+    HasErrors |= replaceFunctionWithOp(                                        \
+        F, OpCode, ArrayRef<IntrinArgSelect>{__VA_ARGS__});                    \
     break;
 #include "DXILOperation.inc"
       case Intrinsic::dx_handle_fromBinding:
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index fbf9891b7c9853..c5431010828882 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -40,7 +40,8 @@ struct DXILIntrinsicSelect {
 static StringRef StripIntrinArgSelectTypePrefix(StringRef Type) {
   StringRef Prefix = "IntrinArgSelect_";
   if (!Type.starts_with(Prefix)) {
-    PrintFatalError("IntrinArgSelectType definintion must be prefixed with 'IntrinArgSelect_'");
+    PrintFatalError("IntrinArgSelectType definintion must be prefixed with "
+                    "'IntrinArgSelect_'");
   }
   return Type.substr(Prefix.size());
 }
@@ -421,11 +422,12 @@ static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
       OS << "DXIL_OP_INTRINSIC(dxil::OpCode::" << Op.OpName
          << ", Intrinsic::" << MappedIntr.Intrinsic << ", ";
       for (const Record *ArgSelect : MappedIntr.ArgSelectRecords) {
-        std::string Type = ArgSelect->getValueAsDef("type")->getNameInitAsString();
+        std::string Type =
+            ArgSelect->getValueAsDef("type")->getNameInitAsString();
         int Value = ArgSelect->getValueAsInt("value");
         OS << "(IntrinArgSelect{"
-          << "IntrinArgSelect::Type::" << StripIntrinArgSelectTypePrefix(Type) << ","
-          << Value << "}), ";
+           << "IntrinArgSelect::Type::" << StripIntrinArgSelectTypePrefix(Type)
+           << "," << Value << "}), ";
       }
       OS << ")\n";
     }
@@ -435,11 +437,14 @@ static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
   OS << "#endif\n\n";
 }
 
-static void emitDXILIntrinsicArgSelectTypes(const RecordKeeper &Records, raw_ostream &OS) {
+/// Emit the IntrinArgSelect type for DirectX intrinsic to DXIL Op lowering
+static void emitDXILIntrinsicArgSelectTypes(const RecordKeeper &Records,
+                                            raw_ostream &OS) {
   OS << "#ifdef DXIL_OP_INTRINSIC_ARG_SELECT_TYPES\n";
   OS << "struct IntrinArgSelect {\n";
   OS << "  enum class Type {\n";
-  for (const Record *Records : Records.getAllDerivedDefinitions("IntrinArgSelectType")) {
+  for (const Record *Records :
+       Records.getAllDerivedDefinitions("IntrinArgSelectType")) {
     StringRef StrippedName = StripIntrinArgSelectTypePrefix(Records->getName());
     OS << "    " << StrippedName << ",\n";
   }

>From 91f696268767b4888c74357c840d4ed4f364ef1e Mon Sep 17 00:00:00 2001
From: Adam Yang <hanbyang at microsoft.com>
Date: Tue, 12 Nov 2024 17:17:58 -0800
Subject: [PATCH 21/24] Fixed using the old name after renaming the type

---
 llvm/lib/Target/DirectX/DXIL.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index ffdec9dfc2698f..ac1fc12f39e0cf 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -357,7 +357,7 @@ class DXILOp<int opcode, DXILOpClass opclass> {
   DXILOpClass OpClass = opclass;
 
   // LLVM Intrinsics DXIL Operation maps from
-  list<IntrinWithArgs> intrinsic_selects = [];
+  list<IntrinSelect> intrinsic_selects = [];
 
   // Result type of the op
   DXILOpParamType result;
@@ -968,7 +968,7 @@ def WaveAllBitCount : DXILOp<135, waveAllOp> {
 def Barrier : DXILOp<80, barrier> {
   let Doc = "inserts a memory barrier in the shader";
   let intrinsic_selects = [
-    IntrinWithArgs<
+    IntrinSelect<
         int_dx_group_memory_barrier_with_group_sync,
         [ IntrinArgI32<BarrierMode_GroupMemoryBarrierWithGroupSync> ]>,
   ];

>From 122c675ca3a72f2d410ae3c8b5c8559bb1db5915 Mon Sep 17 00:00:00 2001
From: Adam Yang <hanbyang at microsoft.com>
Date: Thu, 14 Nov 2024 15:53:57 -0800
Subject: [PATCH 22/24] Addressed more feedback

---
 llvm/lib/Target/DirectX/DXIL.td            | 103 +++++++++++----------
 llvm/lib/Target/DirectX/DXILOpLowering.cpp |  11 ++-
 llvm/utils/TableGen/DXILEmitter.cpp        |  35 +++----
 3 files changed, 74 insertions(+), 75 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index ac1fc12f39e0cf..fc95a50de98ed9 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -316,22 +316,25 @@ class IntrinArgI8   <int value> : IntrinArgSelect<IntrinArgSelect_I8,    value>;
 class IntrinArgI32  <int value> : IntrinArgSelect<IntrinArgSelect_I32,   value>;
 
 // Select which intrinsic to lower from for a DXILOp.
-// If the intrinsic is the only argument given to IntrinSelect, then the arguments of the intrinsic will be copied
-// in the same order. Example:
-//   let intrinsic_selects = [
+// If the intrinsic is the only argument given to IntrinSelect, then the
+// arguments of the intrinsic will be copied in the same order. Example:
+//   let intrinsics = [
 //     IntrinSelect<int_dx_my_intrinsic>,
 //     IntrinSelect<int_dx_my_intrinsic2>,
 //   ]
 //=========================================================================================
-// Using IntrinArgIndex<>, arguments of the intrinsic can be copied in specific order:
-//   let intrinsic_selects = [
+// Using IntrinArgIndex<>, arguments of the intrinsic can be copied to the DXIL
+// OP in specific order:
+//   let intrinsics = [
 //     IntrinSelect<int_dx_my_intrinsic,
 //       [IntrinArgIndex<2>, IntrinArgIndex<1>, IntrinArgIndex<0>> ]
 //     >,
 //   ]
 //=========================================================================================
-// to the dxil op. This can be used in conjunction with IntrinArgIndex:
-//   let intrinsic_selects = [
+// Using IntrinArgI8<> and IntrinArgI32<>, integer constants can be added
+// directly to the dxil op. This can be used in conjunction with
+// IntrinArgIndex:
+//   let intrinsics = [
 //     IntrinSelect<int_dx_wave_active_usum,
 //       [ IntrinArgIndex<0>, IntrinArgI8<0>, IntrinArgI8<1> ]
 //     >,
@@ -357,7 +360,7 @@ class DXILOp<int opcode, DXILOpClass opclass> {
   DXILOpClass OpClass = opclass;
 
   // LLVM Intrinsics DXIL Operation maps from
-  list<IntrinSelect> intrinsic_selects = [];
+  list<IntrinSelect> intrinsics = [];
 
   // Result type of the op
   DXILOpParamType result;
@@ -379,7 +382,7 @@ class DXILOp<int opcode, DXILOpClass opclass> {
 
 def Abs :  DXILOp<6, unary> {
   let Doc = "Returns the absolute value of the input.";
-  let intrinsic_selects = [ IntrinSelect<int_fabs> ];
+  let intrinsics = [ IntrinSelect<int_fabs> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy]>];
@@ -389,7 +392,7 @@ def Abs :  DXILOp<6, unary> {
 
 def Saturate :  DXILOp<7, unary> {
   let Doc = "Clamps a single or double precision floating point value to [0.0f...1.0f].";
-  let intrinsic_selects = [ IntrinSelect<int_dx_saturate> ];
+  let intrinsics = [ IntrinSelect<int_dx_saturate> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy]>];
@@ -399,7 +402,7 @@ def Saturate :  DXILOp<7, unary> {
 
 def IsInf :  DXILOp<9, isSpecialFloat> {
   let Doc = "Determines if the specified value is infinite.";
-  let intrinsic_selects = [ IntrinSelect<int_dx_isinf> ];
+  let intrinsics = [ IntrinSelect<int_dx_isinf> ];
   let arguments = [OverloadTy];
   let result = Int1Ty;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -409,7 +412,7 @@ def IsInf :  DXILOp<9, isSpecialFloat> {
 
 def Cos :  DXILOp<12, unary> {
   let Doc = "Returns cosine(theta) for theta in radians.";
-  let intrinsic_selects = [ IntrinSelect<int_cos> ];
+  let intrinsics = [ IntrinSelect<int_cos> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -419,7 +422,7 @@ def Cos :  DXILOp<12, unary> {
 
 def Sin :  DXILOp<13, unary> {
   let Doc = "Returns sine(theta) for theta in radians.";
-  let intrinsic_selects = [ IntrinSelect<int_sin> ];
+  let intrinsics = [ IntrinSelect<int_sin> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -429,7 +432,7 @@ def Sin :  DXILOp<13, unary> {
 
 def Tan :  DXILOp<14, unary> {
   let Doc = "Returns tangent(theta) for theta in radians.";
-  let intrinsic_selects = [ IntrinSelect<int_tan> ];
+  let intrinsics = [ IntrinSelect<int_tan> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -439,7 +442,7 @@ def Tan :  DXILOp<14, unary> {
 
 def ACos :  DXILOp<15, unary> {
   let Doc = "Returns the arccosine of the specified value.";
-  let intrinsic_selects = [ IntrinSelect<int_acos> ];
+  let intrinsics = [ IntrinSelect<int_acos> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -449,7 +452,7 @@ def ACos :  DXILOp<15, unary> {
 
 def ASin :  DXILOp<16, unary> {
   let Doc = "Returns the arcsine of the specified value.";
-  let intrinsic_selects = [ IntrinSelect<int_asin> ];
+  let intrinsics = [ IntrinSelect<int_asin> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -459,7 +462,7 @@ def ASin :  DXILOp<16, unary> {
 
 def ATan :  DXILOp<17, unary> {
   let Doc = "Returns the arctangent of the specified value.";
-  let intrinsic_selects = [ IntrinSelect<int_atan> ];
+  let intrinsics = [ IntrinSelect<int_atan> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -469,7 +472,7 @@ def ATan :  DXILOp<17, unary> {
 
 def HCos :  DXILOp<18, unary> {
   let Doc = "Returns the hyperbolic cosine of the specified value.";
-  let intrinsic_selects = [ IntrinSelect<int_cosh> ];
+  let intrinsics = [ IntrinSelect<int_cosh> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -479,7 +482,7 @@ def HCos :  DXILOp<18, unary> {
 
 def HSin :  DXILOp<19, unary> {
   let Doc = "Returns the hyperbolic sine of the specified value.";
-  let intrinsic_selects = [ IntrinSelect<int_sinh> ];
+  let intrinsics = [ IntrinSelect<int_sinh> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -489,7 +492,7 @@ def HSin :  DXILOp<19, unary> {
 
 def HTan :  DXILOp<20, unary> {
   let Doc = "Returns the hyperbolic tan of the specified value.";
-  let intrinsic_selects = [ IntrinSelect<int_tanh> ];
+  let intrinsics = [ IntrinSelect<int_tanh> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -500,7 +503,7 @@ def HTan :  DXILOp<20, unary> {
 def Exp2 :  DXILOp<21, unary> {
   let Doc = "Returns the base 2 exponential, or 2**x, of the specified value. "
             "exp2(x) = 2**x.";
-  let intrinsic_selects = [ IntrinSelect<int_exp2> ];
+  let intrinsics = [ IntrinSelect<int_exp2> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -511,7 +514,7 @@ def Exp2 :  DXILOp<21, unary> {
 def Frac :  DXILOp<22, unary> {
   let Doc = "Returns a fraction from 0 to 1 that represents the decimal part "
             "of the input.";
-  let intrinsic_selects = [ IntrinSelect<int_dx_frac> ];
+  let intrinsics = [ IntrinSelect<int_dx_frac> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -521,7 +524,7 @@ def Frac :  DXILOp<22, unary> {
 
 def Log2 :  DXILOp<23, unary> {
   let Doc = "Returns the base-2 logarithm of the specified value.";
-  let intrinsic_selects = [ IntrinSelect<int_log2> ];
+  let intrinsics = [ IntrinSelect<int_log2> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -532,7 +535,7 @@ def Log2 :  DXILOp<23, unary> {
 def Sqrt :  DXILOp<24, unary> {
   let Doc = "Returns the square root of the specified floating-point value, "
             "per component.";
-  let intrinsic_selects = [ IntrinSelect<int_sqrt> ];
+  let intrinsics = [ IntrinSelect<int_sqrt> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -543,7 +546,7 @@ def Sqrt :  DXILOp<24, unary> {
 def RSqrt :  DXILOp<25, unary> {
   let Doc = "Returns the reciprocal of the square root of the specified value. "
             "rsqrt(x) = 1 / sqrt(x).";
-  let intrinsic_selects = [ IntrinSelect<int_dx_rsqrt> ];
+  let intrinsics = [ IntrinSelect<int_dx_rsqrt> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -554,7 +557,7 @@ def RSqrt :  DXILOp<25, unary> {
 def Round :  DXILOp<26, unary> {
   let Doc = "Returns the input rounded to the nearest integer within a "
             "floating-point type.";
-  let intrinsic_selects = [ IntrinSelect<int_roundeven> ];
+  let intrinsics = [ IntrinSelect<int_roundeven> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -565,7 +568,7 @@ def Round :  DXILOp<26, unary> {
 def Floor :  DXILOp<27, unary> {
   let Doc =
       "Returns the largest integer that is less than or equal to the input.";
-  let intrinsic_selects = [ IntrinSelect<int_floor> ];
+  let intrinsics = [ IntrinSelect<int_floor> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -576,7 +579,7 @@ def Floor :  DXILOp<27, unary> {
 def Ceil :  DXILOp<28, unary> {
   let Doc = "Returns the smallest integer that is greater than or equal to the "
             "input.";
-  let intrinsic_selects = [ IntrinSelect<int_ceil> ];
+  let intrinsics = [ IntrinSelect<int_ceil> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -586,7 +589,7 @@ def Ceil :  DXILOp<28, unary> {
 
 def Trunc :  DXILOp<29, unary> {
   let Doc = "Returns the specified value truncated to the integer component.";
-  let intrinsic_selects = [ IntrinSelect<int_trunc> ];
+  let intrinsics = [ IntrinSelect<int_trunc> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -596,7 +599,7 @@ def Trunc :  DXILOp<29, unary> {
 
 def Rbits :  DXILOp<30, unary> {
   let Doc = "Returns the specified value with its bits reversed.";
-  let intrinsic_selects = [ IntrinSelect<int_bitreverse> ];
+  let intrinsics = [ IntrinSelect<int_bitreverse> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -641,7 +644,7 @@ def FirstbitSHi :  DXILOp<34, unaryBits> {
 
 def FMax :  DXILOp<35, binary> {
   let Doc = "Float maximum. FMax(a,b) = a > b ? a : b";
-  let intrinsic_selects = [ IntrinSelect<int_maxnum> ];
+  let intrinsics = [ IntrinSelect<int_maxnum> ];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -652,7 +655,7 @@ def FMax :  DXILOp<35, binary> {
 
 def FMin :  DXILOp<36, binary> {
   let Doc = "Float minimum. FMin(a,b) = a < b ? a : b";
-  let intrinsic_selects = [ IntrinSelect<int_minnum> ];
+  let intrinsics = [ IntrinSelect<int_minnum> ];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -663,7 +666,7 @@ def FMin :  DXILOp<36, binary> {
 
 def SMax :  DXILOp<37, binary> {
   let Doc = "Signed integer maximum. SMax(a,b) = a > b ? a : b";
-  let intrinsic_selects = [ IntrinSelect<int_smax> ];
+  let intrinsics = [ IntrinSelect<int_smax> ];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -674,7 +677,7 @@ def SMax :  DXILOp<37, binary> {
 
 def SMin :  DXILOp<38, binary> {
   let Doc = "Signed integer minimum. SMin(a,b) = a < b ? a : b";
-  let intrinsic_selects = [ IntrinSelect<int_smin> ];
+  let intrinsics = [ IntrinSelect<int_smin> ];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -685,7 +688,7 @@ def SMin :  DXILOp<38, binary> {
 
 def UMax :  DXILOp<39, binary> {
   let Doc = "Unsigned integer maximum. UMax(a,b) = a > b ? a : b";
-  let intrinsic_selects = [ IntrinSelect<int_umax> ];
+  let intrinsics = [ IntrinSelect<int_umax> ];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -696,7 +699,7 @@ def UMax :  DXILOp<39, binary> {
 
 def UMin :  DXILOp<40, binary> {
   let Doc = "Unsigned integer minimum. UMin(a,b) = a < b ? a : b";
-  let intrinsic_selects = [ IntrinSelect<int_umin> ];
+  let intrinsics = [ IntrinSelect<int_umin> ];
   let arguments = [OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -708,7 +711,7 @@ def UMin :  DXILOp<40, binary> {
 def FMad :  DXILOp<46, tertiary> {
   let Doc = "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m "
             "* a + b.";
-  let intrinsic_selects = [ IntrinSelect<int_fmuladd> ];
+  let intrinsics = [ IntrinSelect<int_fmuladd> ];
   let arguments = [OverloadTy, OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -720,7 +723,7 @@ def FMad :  DXILOp<46, tertiary> {
 def IMad :  DXILOp<48, tertiary> {
   let Doc = "Signed integer arithmetic multiply/add operation. imad(m,a,b) = m "
             "* a + b.";
-  let intrinsic_selects = [ IntrinSelect<int_dx_imad> ];
+  let intrinsics = [ IntrinSelect<int_dx_imad> ];
   let arguments = [OverloadTy, OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -732,7 +735,7 @@ def IMad :  DXILOp<48, tertiary> {
 def UMad :  DXILOp<49, tertiary> {
   let Doc = "Unsigned integer arithmetic multiply/add operation. umad(m,a, = m "
             "* a + b.";
-  let intrinsic_selects = [ IntrinSelect<int_dx_umad> ];
+  let intrinsics = [ IntrinSelect<int_dx_umad> ];
   let arguments = [OverloadTy, OverloadTy, OverloadTy];
   let result = OverloadTy;
   let overloads =
@@ -744,7 +747,7 @@ def UMad :  DXILOp<49, tertiary> {
 def Dot2 :  DXILOp<54, dot2> {
   let Doc = "dot product of two float vectors Dot(a,b) = a[0]*b[0] + ... + "
             "a[n]*b[n] where n is 0 to 1 inclusive";
-  let intrinsic_selects = [ IntrinSelect<int_dx_dot2> ];
+  let intrinsics = [ IntrinSelect<int_dx_dot2> ];
   let arguments = !listsplat(OverloadTy, 4);
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -755,7 +758,7 @@ def Dot2 :  DXILOp<54, dot2> {
 def Dot3 :  DXILOp<55, dot3> {
   let Doc = "dot product of two float vectors Dot(a,b) = a[0]*b[0] + ... + "
             "a[n]*b[n] where n is 0 to 2 inclusive";
-  let intrinsic_selects = [ IntrinSelect<int_dx_dot3> ];
+  let intrinsics = [ IntrinSelect<int_dx_dot3> ];
   let arguments = !listsplat(OverloadTy, 6);
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -766,7 +769,7 @@ def Dot3 :  DXILOp<55, dot3> {
 def Dot4 :  DXILOp<56, dot4> {
   let Doc = "dot product of two float vectors Dot(a,b) = a[0]*b[0] + ... + "
             "a[n]*b[n] where n is 0 to 3 inclusive";
-  let intrinsic_selects = [ IntrinSelect<int_dx_dot4> ];
+  let intrinsics = [ IntrinSelect<int_dx_dot4> ];
   let arguments = !listsplat(OverloadTy, 8);
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
@@ -831,7 +834,7 @@ def Discard : DXILOp<82, discard> {
 
 def ThreadId :  DXILOp<93, threadId> {
   let Doc = "Reads the thread ID";
-  let intrinsic_selects = [ IntrinSelect<int_dx_thread_id> ];
+  let intrinsics = [ IntrinSelect<int_dx_thread_id> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
@@ -841,7 +844,7 @@ def ThreadId :  DXILOp<93, threadId> {
 
 def GroupId :  DXILOp<94, groupId> {
   let Doc = "Reads the group ID (SV_GroupID)";
-  let intrinsic_selects = [ IntrinSelect<int_dx_group_id> ];
+  let intrinsics = [ IntrinSelect<int_dx_group_id> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
@@ -851,7 +854,7 @@ def GroupId :  DXILOp<94, groupId> {
 
 def ThreadIdInGroup :  DXILOp<95, threadIdInGroup> {
   let Doc = "Reads the thread ID within the group  (SV_GroupThreadID)";
-  let intrinsic_selects = [ IntrinSelect<int_dx_thread_id_in_group> ];
+  let intrinsics = [ IntrinSelect<int_dx_thread_id_in_group> ];
   let arguments = [OverloadTy];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
@@ -862,7 +865,7 @@ def ThreadIdInGroup :  DXILOp<95, threadIdInGroup> {
 def FlattenedThreadIdInGroup :  DXILOp<96, flattenedThreadIdInGroup> {
   let Doc = "Provides a flattened index for a given thread within a given "
             "group (SV_GroupIndex)";
-  let intrinsic_selects = [ IntrinSelect<int_dx_flattened_thread_id_in_group> ];
+  let intrinsics = [ IntrinSelect<int_dx_flattened_thread_id_in_group> ];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
   let stages = [Stages<DXIL1_0, [compute, mesh, amplification, node]>];
@@ -931,7 +934,7 @@ def WaveActiveAnyTrue : DXILOp<113, waveAnyTrue> {
 
 def WaveIsFirstLane :  DXILOp<110, waveIsFirstLane> {
   let Doc = "returns 1 for the first lane in the wave";
-  let intrinsic_selects = [ IntrinSelect<int_dx_wave_is_first_lane> ];
+  let intrinsics = [ IntrinSelect<int_dx_wave_is_first_lane> ];
   let arguments = [];
   let result = Int1Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
@@ -940,7 +943,7 @@ def WaveIsFirstLane :  DXILOp<110, waveIsFirstLane> {
 
 def WaveReadLaneAt:  DXILOp<117, waveReadLaneAt> {
   let Doc = "returns the value from the specified lane";
-  let intrinsic_selects = [ IntrinSelect<int_dx_wave_readlane> ];
+  let intrinsics = [ IntrinSelect<int_dx_wave_readlane> ];
   let arguments = [OverloadTy, Int32Ty];
   let result = OverloadTy;
   let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, DoubleTy, Int1Ty, Int16Ty, Int32Ty, Int64Ty]>];
@@ -950,7 +953,7 @@ def WaveReadLaneAt:  DXILOp<117, waveReadLaneAt> {
 
 def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> {
   let Doc = "returns the index of the current lane in the wave";
-  let intrinsic_selects = [ IntrinSelect<int_dx_wave_getlaneindex> ];
+  let intrinsics = [ IntrinSelect<int_dx_wave_getlaneindex> ];
   let arguments = [];
   let result = Int32Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
@@ -967,7 +970,7 @@ def WaveAllBitCount : DXILOp<135, waveAllOp> {
 
 def Barrier : DXILOp<80, barrier> {
   let Doc = "inserts a memory barrier in the shader";
-  let intrinsic_selects = [
+  let intrinsics = [
     IntrinSelect<
         int_dx_group_memory_barrier_with_group_sync,
         [ IntrinArgI32<BarrierMode_GroupMemoryBarrierWithGroupSync> ]>,
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index f07f1bddbc8c0d..1c76f084fd873b 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -106,13 +106,22 @@ class OpLowerer {
     return false;
   }
 
-#define DXIL_OP_INTRINSIC_ARG_SELECT_TYPES
+  struct IntrinArgSelect {
+    enum class Type {
+#define DXIL_OP_INTRINSIC_ARG_SELECT_TYPE(name) name,
 #include "DXILOperation.inc"
+    };
+    Type Type;
+    int Value;
+  };
 
   [[nodiscard]] bool
   replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp,
                         ArrayRef<IntrinArgSelect> ArgSelects) {
     bool IsVectorArgExpansion = isVectorArgExpansion(F);
+    assert(!IsVectorArgExpansion ||
+           ArgSelects.empty() &&
+               "Cann't do vector arg expansion when using arg selects.");
     return replaceFunction(F, [&](CallInst *CI) -> Error {
       OpBuilder.getIRB().SetInsertPoint(CI);
       SmallVector<Value *> Args;
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index c5431010828882..a0c93bed5ad834 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -185,23 +185,16 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
                            OpName);
   }
 
-  auto IntrinsicSelectRecords = R->getValueAsListOfDefs("intrinsic_selects");
+  auto IntrinsicSelectRecords = R->getValueAsListOfDefs("intrinsics");
   if (IntrinsicSelectRecords.size()) {
-    if (IntrinsicSelects.size()) {
-      PrintFatalError(
-          R, Twine("LLVMIntrinsic and intrinsic_selects cannot be both "
-                   "defined for DXIL operation - ") +
-                 OpName);
-    } else {
-      for (const Record *R : IntrinsicSelectRecords) {
-        DXILIntrinsicSelect IntrSelect;
-        IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("intrinsic"));
-        auto Args = R->getValueAsListOfDefs("arg_selects");
-        for (const Record *ArgSelect : Args) {
-          IntrSelect.ArgSelectRecords.emplace_back(ArgSelect);
-        }
-        IntrinsicSelects.emplace_back(std::move(IntrSelect));
+    for (const Record *R : IntrinsicSelectRecords) {
+      DXILIntrinsicSelect IntrSelect;
+      IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("intrinsic"));
+      auto Args = R->getValueAsListOfDefs("arg_selects");
+      for (const Record *ArgSelect : Args) {
+        IntrSelect.ArgSelectRecords.emplace_back(ArgSelect);
       }
+      IntrinsicSelects.emplace_back(std::move(IntrSelect));
     }
   }
 }
@@ -440,19 +433,13 @@ static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
 /// Emit the IntrinArgSelect type for DirectX intrinsic to DXIL Op lowering
 static void emitDXILIntrinsicArgSelectTypes(const RecordKeeper &Records,
                                             raw_ostream &OS) {
-  OS << "#ifdef DXIL_OP_INTRINSIC_ARG_SELECT_TYPES\n";
-  OS << "struct IntrinArgSelect {\n";
-  OS << "  enum class Type {\n";
+  OS << "#ifdef DXIL_OP_INTRINSIC_ARG_SELECT_TYPE\n";
   for (const Record *Records :
        Records.getAllDerivedDefinitions("IntrinArgSelectType")) {
     StringRef StrippedName = StripIntrinArgSelectTypePrefix(Records->getName());
-    OS << "    " << StrippedName << ",\n";
+    OS << "DXIL_OP_INTRINSIC_ARG_SELECT_TYPE(" << StrippedName << ")\n";
   }
-  OS << "  };\n";
-  OS << "  Type Type;\n";
-  OS << "  int Value;\n";
-  OS << "};\n";
-  OS << "#undef DXIL_OP_INTRINSIC_ARG_SELECT_TYPES\n";
+  OS << "#undef DXIL_OP_INTRINSIC_ARG_SELECT_TYPE\n";
   OS << "#endif\n\n";
 }
 

>From 4acbe4341211c30ed34e33c048bed5b3fd2cdab0 Mon Sep 17 00:00:00 2001
From: Adam Yang <hanbyang at microsoft.com>
Date: Thu, 14 Nov 2024 16:04:47 -0800
Subject: [PATCH 23/24] Made the assert less confusing

---
 llvm/lib/Target/DirectX/DXILOpLowering.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 1c76f084fd873b..a0d46efd1763ea 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -119,9 +119,8 @@ class OpLowerer {
   replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp,
                         ArrayRef<IntrinArgSelect> ArgSelects) {
     bool IsVectorArgExpansion = isVectorArgExpansion(F);
-    assert(!IsVectorArgExpansion ||
-           ArgSelects.empty() &&
-               "Cann't do vector arg expansion when using arg selects.");
+    assert(!(IsVectorArgExpansion && ArgSelects.size()) &&
+           "Cann't do vector arg expansion when using arg selects.");
     return replaceFunction(F, [&](CallInst *CI) -> Error {
       OpBuilder.getIRB().SetInsertPoint(CI);
       SmallVector<Value *> Args;

>From 1aaa5f7f27d173ef7aa6906309beea2c99e845f1 Mon Sep 17 00:00:00 2001
From: Adam Yang <hanbyang at microsoft.com>
Date: Sun, 1 Dec 2024 16:53:09 -0800
Subject: [PATCH 24/24] Fixed bad rebase and fixed new intrinsics added from
 main

---
 llvm/lib/Target/DirectX/DXIL.td               | 17 ++++++++-------
 .../Target/DirectX/DXILIntrinsicExpansion.cpp | 21 -------------------
 2 files changed, 9 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index fc95a50de98ed9..cff6cdce813ded 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -621,7 +621,7 @@ def CountBits :  DXILOp<31, unaryBits> {
 def FirstbitHi :  DXILOp<33, unaryBits> {
   let Doc = "Returns the location of the first set bit starting from "
             "the highest order bit and working downward.";
-  let LLVMIntrinsic = int_dx_firstbituhigh;
+  let intrinsics = [ IntrinSelect<int_dx_firstbituhigh> ];
   let arguments = [OverloadTy];
   let result = Int32Ty;
   let overloads =
@@ -633,7 +633,7 @@ def FirstbitHi :  DXILOp<33, unaryBits> {
 def FirstbitSHi :  DXILOp<34, unaryBits> {
   let Doc = "Returns the location of the first set bit from "
             "the highest order bit based on the sign.";
-  let LLVMIntrinsic = int_dx_firstbitshigh;
+  let intrinsics = [ IntrinSelect<int_dx_firstbitshigh> ];
   let arguments = [OverloadTy];
   let result = Int32Ty;
   let overloads =
@@ -826,7 +826,7 @@ def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> {
 
 def Discard : DXILOp<82, discard> {
   let Doc = "discard the current pixel";
-  let LLVMIntrinsic = int_dx_discard;
+  let intrinsics = [ IntrinSelect<int_dx_discard> ];
   let arguments = [Int1Ty];
   let result = VoidTy;
   let stages = [Stages<DXIL1_0, [pixel]>];
@@ -874,7 +874,7 @@ def FlattenedThreadIdInGroup :  DXILOp<96, flattenedThreadIdInGroup> {
 
 def MakeDouble :  DXILOp<101, makeDouble> {
   let Doc = "creates a double value";
-  let LLVMIntrinsic = int_dx_asdouble;
+  let intrinsics = [ IntrinSelect<int_dx_asdouble> ];
   let arguments = [Int32Ty, Int32Ty];
   let result = DoubleTy;
   let stages = [Stages<DXIL1_0, [all_stages]>];
@@ -893,7 +893,7 @@ def SplitDouble :  DXILOp<102, splitDouble> {
 def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> {
   let Doc = "signed dot product of 4 x i8 vectors packed into i32, with "
             "accumulate to i32";
-  let LLVMIntrinsic = int_dx_dot4add_i8packed;
+  let intrinsics = [ IntrinSelect<int_dx_dot4add_i8packed> ];
   let arguments = [Int32Ty, Int32Ty, Int32Ty];
   let result = Int32Ty;
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
@@ -903,7 +903,7 @@ def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> {
 def Dot4AddU8Packed : DXILOp<164, dot4AddPacked> {
   let Doc = "unsigned dot product of 4 x i8 vectors packed into i32, with "
             "accumulate to i32";
-  let LLVMIntrinsic = int_dx_dot4add_u8packed;
+  let intrinsics = [ IntrinSelect<int_dx_dot4add_u8packed> ];
   let arguments = [Int32Ty, Int32Ty, Int32Ty];
   let result = Int32Ty;
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
@@ -926,7 +926,7 @@ def CreateHandleFromBinding : DXILOp<217, createHandleFromBinding> {
 
 def WaveActiveAnyTrue : DXILOp<113, waveAnyTrue> {
   let Doc = "returns true if the expression is true in any of the active lanes in the current wave";
-  let LLVMIntrinsic = int_dx_wave_any;
+  let intrinsics = [ IntrinSelect<int_dx_wave_any> ];
   let arguments = [Int1Ty];
   let result = Int1Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
@@ -962,11 +962,12 @@ def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> {
 
 def WaveAllBitCount : DXILOp<135, waveAllOp> {
   let Doc = "returns the count of bits set to 1 across the wave";
-  let LLVMIntrinsic = int_dx_wave_active_countbits;
+  let intrinsics = [ IntrinSelect<int_dx_wave_active_countbits> ];
   let arguments = [Int1Ty];
   let result = Int32Ty;
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
 
 def Barrier : DXILOp<80, barrier> {
   let Doc = "inserts a memory barrier in the shader";
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 3fdfbaa659bd88..d2bfca1fada559 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -453,27 +453,6 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
   return Builder.CreateFMul(X, PiOver180);
 }
 
-static Value *expandMemoryBarrier(CallInst *Orig, Intrinsic::ID IntrinsicId) {
-  assert(IntrinsicId == Intrinsic::dx_group_memory_barrier_with_group_sync);
-  unsigned BarrierMode = 0;
-  switch (IntrinsicId) {
-  case Intrinsic::dx_group_memory_barrier_with_group_sync:
-    BarrierMode = (unsigned)dxil::BarrierMode::TGSMFence |
-                  (unsigned)dxil::BarrierMode::SyncThreadGroup;
-    break;
-  default:
-    report_fatal_error(Twine("Unexpected memory barrier intrinsic."),
-                       /* gen_crash_diag=*/false);
-    break;
-  }
-
-  IRBuilder<> Builder(Orig);
-  return Builder.CreateIntrinsic(
-      Builder.getVoidTy(), Intrinsic::dx_memory_barrier,
-      ArrayRef<Value *>{Builder.getInt32(BarrierMode)}, nullptr,
-      Orig->getName());
-}
-
 static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic) {
   if (ClampIntrinsic == Intrinsic::dx_uclamp)
     return Intrinsic::umax;



More information about the llvm-commits mailing list