[llvm] [DXIL] Add GroupMemoryBarrierWithGroupSync intrinsic (PR #114349)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 11 14:40:08 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-directx

Author: Adam Yang (adam-yang)

<details>
<summary>Changes</summary>

fixes #<!-- -->112974
partially fixes #<!-- -->70103

This change was reverted so some issues could be fixed.

### Changes
- Added new tablegen based way of lowering dx intrinsics to DXIL ops.
- Added int_dx_group_memory_barrier_with_group_sync intrinsic in IntrinsicsDirectX.td
- Added expansion for int_dx_group_memory_barrier_with_group_sync in DXILIntrinsicExpansion.cpp`
- Added DXIL backend test case

### Related PRs
* [[clang][HLSL] Add GroupMemoryBarrierWithGroupSync intrinsic #<!-- -->111883](https://github.com/llvm/llvm-project/pull/111883)
* [[SPIRV] Add GroupMemoryBarrierWithGroupSync intrinsic #<!-- -->111888](https://github.com/llvm/llvm-project/pull/111888)

---
Full diff: https://github.com/llvm/llvm-project/pull/114349.diff


5 Files Affected:

- (modified) llvm/include/llvm/IR/IntrinsicsDirectX.td (+2) 
- (modified) llvm/lib/Target/DirectX/DXIL.td (+48) 
- (modified) llvm/lib/Target/DirectX/DXILOpLowering.cpp (+34-9) 
- (added) llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll (+8) 
- (modified) llvm/utils/TableGen/DXILEmitter.cpp (+99-13) 


``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index e30d37f69f781e..dada426368995d 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -92,4 +92,6 @@ def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, L
 def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>], 
     [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>;
 def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
+def int_dx_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
 }
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index af12b74351058e..bb07c9492a1a4b 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -294,6 +294,37 @@ class Attributes<Version ver = DXIL1_0, list<DXILAttribute> attrs> {
   list<DXILAttribute> op_attrs = attrs;
 }
 
+defvar BarrierMode_DeviceMemoryBarrier              = 2;
+defvar BarrierMode_DeviceMemoryBarrierWithGroupSync = 3;
+defvar BarrierMode_GroupMemoryBarrier               = 8;
+defvar BarrierMode_GroupMemoryBarrierWithGroupSync  = 9;
+defvar BarrierMode_AllMemoryBarrier                 = 10;
+defvar BarrierMode_AllMemoryBarrierWithGroupSync    = 11;
+
+// Intrinsic arg selection
+class Arg {
+  int index = -1;
+  int value = 0;
+  bit is_i8 = 0;
+  bit is_i32 = 0;
+}
+class ArgSelect<int index_> : Arg {
+  let index = index_;
+}
+class ArgI32<int value_> : Arg {
+  let value = value_;
+  let is_i32 = 1;
+}
+class ArgI8<int value_> : Arg {
+  let value = value_;
+  let is_i8 = 1;
+}
+
+class IntrinsicSelect<Intrinsic intrinsic_, list<Arg> args_> {
+  Intrinsic intrinsic = intrinsic_;
+  list<Arg> args = args_;
+}
+
 // Abstraction DXIL Operation
 class DXILOp<int opcode, DXILOpClass opclass> {
   // A short description of the operation
@@ -308,6 +339,9 @@ class DXILOp<int opcode, DXILOpClass opclass> {
   // LLVM Intrinsic DXIL Operation maps to
   Intrinsic LLVMIntrinsic = ?;
 
+  // Non-trivial LLVM Intrinsics DXIL Operation maps to
+  list<IntrinsicSelect> intrinsic_selects = [];
+
   // Result type of the op
   DXILOpParamType result;
 
@@ -829,3 +863,17 @@ def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> {
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
+
+def Barrier : DXILOp<80, barrier> {
+  let Doc = "inserts a memory barrier in the shader";
+  let intrinsic_selects = [
+    IntrinsicSelect<
+        int_dx_group_memory_barrier_with_group_sync,
+        [ ArgI32<BarrierMode_GroupMemoryBarrierWithGroupSync> ]>,
+  ];
+
+  let arguments = [Int32Ty];
+  let result = VoidTy;
+  let stages = [Stages<DXIL1_0, [compute, library]>];
+  let attributes = [Attributes<DXIL1_0, []>];
+}
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 8acc9c1efa08c0..5f278b609690e2 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -106,17 +106,41 @@ class OpLowerer {
     return false;
   }
 
-  [[nodiscard]]
-  bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp) {
+  struct ArgSelect {
+    enum class Type {
+      Index,
+      I8,
+      I32,
+    };
+    Type Type = Type::Index;
+    int Value = -1;
+  };
+
+  [[nodiscard]] bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp,
+                                           ArrayRef<ArgSelect> ArgSelects) {
     bool IsVectorArgExpansion = isVectorArgExpansion(F);
     return replaceFunction(F, [&](CallInst *CI) -> Error {
-      SmallVector<Value *> Args;
       OpBuilder.getIRB().SetInsertPoint(CI);
-      if (IsVectorArgExpansion) {
-        SmallVector<Value *> NewArgs = argVectorFlatten(CI, OpBuilder.getIRB());
-        Args.append(NewArgs.begin(), NewArgs.end());
-      } else
+      SmallVector<Value *> Args;
+      if (ArgSelects.size()) {
+        for (const ArgSelect &A : ArgSelects) {
+          switch (A.Type) {
+          case ArgSelect::Type::Index:
+            Args.push_back(CI->getArgOperand(A.Value));
+            break;
+          case ArgSelect::Type::I8:
+            Args.push_back(OpBuilder.getIRB().getInt8((uint8_t)A.Value));
+            break;
+          case ArgSelect::Type::I32:
+            Args.push_back(OpBuilder.getIRB().getInt32(A.Value));
+            break;
+          }
+        }
+      } else if (IsVectorArgExpansion) {
+        Args = argVectorFlatten(CI, OpBuilder.getIRB());
+      } else {
         Args.append(CI->arg_begin(), CI->arg_end());
+      }
 
       Expected<CallInst *> OpCall =
           OpBuilder.tryCreateOp(DXILOp, Args, CI->getName(), F.getReturnType());
@@ -583,9 +607,10 @@ class OpLowerer {
       switch (ID) {
       default:
         continue;
-#define DXIL_OP_INTRINSIC(OpCode, Intrin)                                      \
+#define DXIL_OP_INTRINSIC(OpCode, Intrin, ...)                                 \
   case Intrin:                                                                 \
-    HasErrors |= replaceFunctionWithOp(F, OpCode);                             \
+    HasErrors |=                                                               \
+        replaceFunctionWithOp(F, OpCode, ArrayRef<ArgSelect>{__VA_ARGS__});    \
     break;
 #include "DXILOperation.inc"
       case Intrinsic::dx_handle_fromBinding:
diff --git a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
new file mode 100644
index 00000000000000..baf93d4e177f0f
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
@@ -0,0 +1,8 @@
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s
+
+define void @test_group_memory_barrier_with_group_sync() {
+entry:
+  ; CHECK: call void @dx.op.barrier(i32 80, i32 9)
+  call void @llvm.dx.group.memory.barrier.with.group.sync()
+  ret void
+}
\ No newline at end of file
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index e74fc00015b404..b75696db410525 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -32,6 +32,20 @@ using namespace llvm::dxil;
 
 namespace {
 
+struct DXILArgSelect {
+  enum class Type {
+    Index,
+    I32,
+    I8,
+  };
+  Type Type = Type::Index;
+  int Value = -1;
+};
+struct DXILIntrinsicSelect {
+  StringRef Intrinsic;
+  SmallVector<DXILArgSelect, 4> Args;
+};
+
 struct DXILOperationDesc {
   std::string OpName; // name of DXIL operation
   int OpCode;         // ID of DXIL operation
@@ -42,8 +56,7 @@ struct DXILOperationDesc {
   SmallVector<const Record *> OverloadRecs;
   SmallVector<const Record *> StageRecs;
   SmallVector<const Record *> AttrRecs;
-  StringRef Intrinsic; // The llvm intrinsic map to OpName. Default is "" which
-                       // means no map exists
+  SmallVector<DXILIntrinsicSelect> IntrinsicSelects;
   SmallVector<StringRef, 4>
       ShaderStages; // shader stages to which this applies, empty for all.
   int OverloadParamIndex;             // Index of parameter with overload type.
@@ -71,6 +84,21 @@ static void ascendingSortByVersion(std::vector<const Record *> &Recs) {
   });
 }
 
+/// Take a `int_{intrinsic_name}` and return just the intrinsic_name part if
+/// available. Otherwise return the empty string.
+static StringRef GetIntrinsicName(const RecordVal *RV) {
+  if (RV && RV->getValue()) {
+    if (const DefInit *DI = dyn_cast<DefInit>(RV->getValue())) {
+      auto *IntrinsicDef = DI->getDef();
+      auto DefName = IntrinsicDef->getName();
+      assert(DefName.starts_with("int_") && "invalid intrinsic name");
+      // Remove the int_ from intrinsic name.
+      return DefName.substr(4);
+    }
+  }
+  return "";
+}
+
 /// Construct an object using the DXIL Operation records specified
 /// in DXIL.td. This serves as the single source of reference of
 /// the information extracted from the specified Record R, for
@@ -157,14 +185,53 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
                            OpName);
   }
 
-  const RecordVal *RV = R->getValue("LLVMIntrinsic");
-  if (RV && RV->getValue()) {
-    if (const DefInit *DI = dyn_cast<DefInit>(RV->getValue())) {
-      auto *IntrinsicDef = DI->getDef();
-      auto DefName = IntrinsicDef->getName();
-      assert(DefName.starts_with("int_") && "invalid intrinsic name");
-      // Remove the int_ from intrinsic name.
-      Intrinsic = DefName.substr(4);
+  {
+    DXILIntrinsicSelect IntrSelect;
+    IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("LLVMIntrinsic"));
+    if (IntrSelect.Intrinsic.size())
+      IntrinsicSelects.emplace_back(std::move(IntrSelect));
+  }
+
+  auto IntrinsicSelectRecords = R->getValueAsListOfDefs("intrinsic_selects");
+  if (IntrinsicSelectRecords.size()) {
+    if (IntrinsicSelects.size()) {
+      PrintFatalError(
+          R, Twine("LLVMIntrinsic and intrinsic_selects cannot be both "
+                   "defined for DXIL operation - ") +
+                 OpName);
+    } else {
+      for (const Record *R : IntrinsicSelectRecords) {
+        DXILIntrinsicSelect IntrSelect;
+        IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("intrinsic"));
+        auto Args = R->getValueAsListOfDefs("args");
+        for (const Record *Arg : Args) {
+          bool IsI8 = Arg->getValueAsBit("is_i8");
+          bool IsI32 = Arg->getValueAsBit("is_i32");
+          int Index = Arg->getValueAsInt("index");
+          int Value = Arg->getValueAsInt("value");
+
+          DXILArgSelect ArgSelect;
+          if (IsI8) {
+            ArgSelect.Type = DXILArgSelect::Type::I8;
+            ArgSelect.Value = Value;
+          } else if (IsI32) {
+            ArgSelect.Type = DXILArgSelect::Type::I32;
+            ArgSelect.Value = Value;
+          } else {
+            if (Index < 0) {
+              PrintFatalError(
+                  R, Twine("Index in ArgSelect<index> must be equal to or "
+                           "greater than 0 for DXIL operation - ") +
+                         OpName);
+            }
+            ArgSelect.Type = DXILArgSelect::Type::Index;
+            ArgSelect.Value = Index;
+          }
+
+          IntrSelect.Args.emplace_back(std::move(ArgSelect));
+        }
+        IntrinsicSelects.emplace_back(std::move(IntrSelect));
+      }
     }
   }
 }
@@ -377,10 +444,29 @@ static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
   OS << "#ifdef DXIL_OP_INTRINSIC\n";
   OS << "\n";
   for (const auto &Op : Ops) {
-    if (Op.Intrinsic.empty())
+    if (Op.IntrinsicSelects.empty()) {
       continue;
-    OS << "DXIL_OP_INTRINSIC(dxil::OpCode::" << Op.OpName
-       << ", Intrinsic::" << Op.Intrinsic << ")\n";
+    }
+    for (const DXILIntrinsicSelect &MappedIntr : Op.IntrinsicSelects) {
+      OS << "DXIL_OP_INTRINSIC(dxil::OpCode::" << Op.OpName
+         << ", Intrinsic::" << MappedIntr.Intrinsic << ", ";
+      for (const DXILArgSelect &ArgSelect : MappedIntr.Args) {
+        OS << "(ArgSelect { ";
+        switch (ArgSelect.Type) {
+        case DXILArgSelect::Type::Index:
+          OS << "ArgSelect::Type::Index, ";
+          break;
+        case DXILArgSelect::Type::I8:
+          OS << "ArgSelect::Type::I8, ";
+          break;
+        case DXILArgSelect::Type::I32:
+          OS << "ArgSelect::Type::I32, ";
+          break;
+        }
+        OS << ArgSelect.Value << "}), ";
+      }
+      OS << ")\n";
+    }
   }
   OS << "\n";
   OS << "#undef DXIL_OP_INTRINSIC\n";

``````````

</details>


https://github.com/llvm/llvm-project/pull/114349


More information about the llvm-commits mailing list