[llvm] [DirectX] NonUniformResourceIndex lowering (PR #159608)

Helena Kotas via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 18 10:23:04 PDT 2025


https://github.com/hekota created https://github.com/llvm/llvm-project/pull/159608

Introduces `llvm.{dx|svp}.resource.nonuniformindex` intrinsic that will be used when a resource index is not guaranteed to be uniform across threads (HLSL function NonUniformResourceIndex).

The DXIL lowering layer looks for this intrinsic call in the resource index calculation, makes sure it is reflected in the NonUniform flag on DXIL create handle ops (`dx.op.createHandle` and `dx.op.createHandleFromBinding`), and then removes it from the module.

Closes #155701

>From 987b9e121700fb578db4cbb8112f932628c9fa42 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Thu, 18 Sep 2025 10:22:06 -0700
Subject: [PATCH] [DirectX] NonUniformResourceIndex lowering

Introduces `llvm.{dx|svp}.resource.nonuniformindex` intrinsic that will be used when a resource index is not guaranteed to be uniform across threads (HLSL function NonUniformResourceIndex).

The DXIL lowering layer looks for this intrinsic call in the resource index calculation, makes sure it is reflected in the NonUniform flag on DXIL create handle ops (`dx.op.createHandle` and `dx.op.createHandleFromBinding`), and then removes it from the module.

Closes #155701
---
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |  4 ++
 llvm/include/llvm/IR/IntrinsicsSPIRV.td       |  3 +
 llvm/lib/Target/DirectX/DXILOpLowering.cpp    | 68 ++++++++++++++++---
 .../test/CodeGen/DirectX/CreateHandle-NURI.ll | 44 ++++++++++++
 .../DirectX/CreateHandleFromBinding-NURI.ll   | 46 +++++++++++++
 5 files changed, 155 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/CodeGen/DirectX/CreateHandle-NURI.ll
 create mode 100644 llvm/test/CodeGen/DirectX/CreateHandleFromBinding-NURI.ll

diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 5d76c3f8df89d..65f199bbe90dc 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -39,6 +39,10 @@ def int_dx_resource_handlefromimplicitbinding
 def int_dx_resource_getpointer
     : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty],
                             [IntrNoMem]>;
+
+def int_dx_resource_nonuniformindex
+    : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+
 def int_dx_resource_load_typedbuffer
     : DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty],
                             [llvm_any_ty, llvm_i32_ty], [IntrReadMem]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index bc026fa33c769..0b8258be8e1f3 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -160,6 +160,9 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]
       : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty],
                               [IntrNoMem]>;
 
+def int_spv_resource_nonuniformindex
+      : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+
   // Read a value from the image buffer. It does not translate directly to a
   // single OpImageRead because the result type is not necessarily a 4 element
   // vector.
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 577b4624458b9..610d8b63bba27 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Analysis/DXILMetadataAnalysis.h"
 #include "llvm/Analysis/DXILResource.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
@@ -24,6 +25,7 @@
 #include "llvm/IR/IntrinsicsDirectX.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/Use.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -42,6 +44,7 @@ class OpLowerer {
   DXILResourceTypeMap &DRTM;
   const ModuleMetadataInfo &MMDI;
   SmallVector<CallInst *> CleanupCasts;
+  Function *CleanupNURI = nullptr;
 
 public:
   OpLowerer(Module &M, DXILResourceMap &DRM, DXILResourceTypeMap &DRTM,
@@ -195,6 +198,21 @@ class OpLowerer {
     CleanupCasts.clear();
   }
 
+  void cleanupNonUniformResourceIndexCalls() {
+    // Replace all NonUniformResourceIndex calls with their argument.
+    if (!CleanupNURI)
+      return;
+    for (User *U : make_early_inc_range(CleanupNURI->users())) {
+      CallInst *CI = dyn_cast<CallInst>(U);
+      if (!CI)
+        continue;
+      CI->replaceAllUsesWith(CI->getArgOperand(0));
+      CI->eraseFromParent();
+    }
+    CleanupNURI->eraseFromParent();
+    CleanupNURI = nullptr;
+  }
+
   // Remove the resource global associated with the handleFromBinding call
   // instruction and their uses as they aren't needed anymore.
   // TODO: We should verify that all the globals get removed.
@@ -229,6 +247,31 @@ class OpLowerer {
       NameGlobal->removeFromParent();
   }
 
+  bool hasNonUniformIndex(Value *IndexOp) {
+    if (isa<llvm::Constant>(IndexOp))
+      return false;
+
+    SmallVector<Value *> WorkList;
+    WorkList.push_back(IndexOp);
+
+    while (!WorkList.empty()) {
+      Value *V = WorkList.pop_back_val();
+      if (auto *CI = dyn_cast<CallInst>(V)) {
+        if (CI->getCalledFunction()->getIntrinsicID() ==
+            Intrinsic::dx_resource_nonuniformindex)
+          return true;
+      }
+      if (auto *U = llvm::dyn_cast<llvm::User>(V)) {
+        for (llvm::Value *Op : U->operands()) {
+          if (isa<llvm::Constant>(Op))
+            continue;
+          WorkList.push_back(Op);
+        }
+      }
+    }
+    return false;
+  }
+
   [[nodiscard]] bool lowerToCreateHandle(Function &F) {
     IRBuilder<> &IRB = OpBuilder.getIRB();
     Type *Int8Ty = IRB.getInt8Ty();
@@ -250,13 +293,12 @@ class OpLowerer {
         IndexOp = IRB.CreateAdd(IndexOp,
                                 ConstantInt::get(Int32Ty, Binding.LowerBound));
 
-      // FIXME: The last argument is a NonUniform flag which needs to be set
-      // based on resource analysis.
-      // https://github.com/llvm/llvm-project/issues/155701
+      bool HasNonUniformIndex =
+          (Binding.Size == 1) ? false : hasNonUniformIndex(IndexOp);
       std::array<Value *, 4> Args{
           ConstantInt::get(Int8Ty, llvm::to_underlying(RC)),
           ConstantInt::get(Int32Ty, Binding.RecordID), IndexOp,
-          ConstantInt::get(Int1Ty, false)};
+          ConstantInt::get(Int1Ty, HasNonUniformIndex)};
       Expected<CallInst *> OpCall =
           OpBuilder.tryCreateOp(OpCode::CreateHandle, Args, CI->getName());
       if (Error E = OpCall.takeError())
@@ -300,11 +342,10 @@ class OpLowerer {
                                 : Binding.LowerBound + Binding.Size - 1;
       Constant *ResBind = OpBuilder.getResBind(Binding.LowerBound, UpperBound,
                                                Binding.Space, RC);
-      // FIXME: The last argument is a NonUniform flag which needs to be set
-      // based on resource analysis.
-      // https://github.com/llvm/llvm-project/issues/155701
-      Constant *NonUniform = ConstantInt::get(Int1Ty, false);
-      std::array<Value *, 3> BindArgs{ResBind, IndexOp, NonUniform};
+      bool NonUniformIndex =
+          (Binding.Size == 1) ? false : hasNonUniformIndex(IndexOp);
+      Constant *NonUniformOp = ConstantInt::get(Int1Ty, NonUniformIndex);
+      std::array<Value *, 3> BindArgs{ResBind, IndexOp, NonUniformOp};
       Expected<CallInst *> OpBind = OpBuilder.tryCreateOp(
           OpCode::CreateHandleFromBinding, BindArgs, CI->getName());
       if (Error E = OpBind.takeError())
@@ -868,6 +909,11 @@ class OpLowerer {
       case Intrinsic::dx_resource_getpointer:
         HasErrors |= lowerGetPointer(F);
         break;
+      case Intrinsic::dx_resource_nonuniformindex:
+        assert(!CleanupNURI &&
+               "overloaded llvm.dx.resource.nonuniformindex intrinsics?");
+        CleanupNURI = &F;
+        break;
       case Intrinsic::dx_resource_load_typedbuffer:
         HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true);
         break;
@@ -908,8 +954,10 @@ class OpLowerer {
       }
       Updated = true;
     }
-    if (Updated && !HasErrors)
+    if (Updated && !HasErrors) {
       cleanupHandleCasts();
+      cleanupNonUniformResourceIndexCalls();
+    }
 
     return Updated;
   }
diff --git a/llvm/test/CodeGen/DirectX/CreateHandle-NURI.ll b/llvm/test/CodeGen/DirectX/CreateHandle-NURI.ll
new file mode 100644
index 0000000000000..f981be6570cf8
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/CreateHandle-NURI.ll
@@ -0,0 +1,44 @@
+; RUN: opt -S -passes=dxil-op-lower %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.0-compute"
+
+ at A.str = internal unnamed_addr constant [2 x i8] c"A\00", align 1
+
+declare i32 @some_val();
+
+define void @test_buffers_with_nuri() {
+
+  %val = call i32 @some_val()
+
+  ; RWBuffer<float> A[10];
+  ;
+  ; A[NonUniformResourceIndex(val)];
+
+  %nuri1 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val)
+  %res1 = call target("dx.TypedBuffer", float, 1, 0, 0) 
+            @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %nuri1, ptr @A.str)
+  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %val, i1 true) #[[ATTR:.*]]
+  ; CHECK-NOT: @llvm.dx.cast.handle
+  ; CHECK-NOT: @llvm.dx.resource.nonuniformindex
+
+  ; A[NonUniformResourceIndex(val + 1) % 10];
+  %add1 = add i32 %val, 1
+  %nuri2 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %add1)
+  %rem1 = urem i32 %nuri2, 10
+  %res2 = call target("dx.TypedBuffer", float, 1, 0, 0) 
+           @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %rem1, ptr @A.str)
+  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %rem1, i1 true) #[[ATTR]]
+
+  ; A[10 + 3 * NonUniformResourceIndex(GI)];
+  %mul1 = mul i32 %nuri1, 3
+  %add2 = add i32 %mul1, 10
+  %res3 = call target("dx.TypedBuffer", float, 1, 0, 0)
+           @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %add2, ptr @A.str)
+  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %add2, i1 true) #[[ATTR]]
+
+  ret void
+}
+
+; CHECK: attributes #[[ATTR]] = {{{.*}} memory(read) {{.*}}}
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
diff --git a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding-NURI.ll b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding-NURI.ll
new file mode 100644
index 0000000000000..6d37652acccf0
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding-NURI.ll
@@ -0,0 +1,46 @@
+; RUN: opt -S -passes=dxil-op-lower %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+ at A.str = internal unnamed_addr constant [2 x i8] c"A\00", align 1
+
+declare i32 @some_val();
+
+define void @test_buffers_with_nuri() {
+
+  %val = call i32 @some_val()
+
+  ; RWBuffer<float> A[10];
+  ;
+  ; A[NonUniformResourceIndex(val)];
+
+  %nuri1 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val)
+  %res1 = call target("dx.TypedBuffer", float, 1, 0, 0) 
+            @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %nuri1, ptr @A.str)
+  ; CHECK: %[[RES1:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 9, i32 0, i8 1 }, i32 %val, i1 true) #[[ATTR:.*]]
+  ; CHECK: call  %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES1]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]]
+  ; CHECK-NOT: @llvm.dx.cast.handle
+  ; CHECK-NOT: @llvm.dx.resource.nonuniformindex
+
+  ; A[NonUniformResourceIndex(val + 1) % 10];
+  %add1 = add i32 %val, 1
+  %nuri2 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %add1)
+  %rem1 = urem i32 %nuri2, 10
+  %res2 = call target("dx.TypedBuffer", float, 1, 0, 0) 
+           @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %rem1, ptr @A.str)
+  ; CHECK: %[[RES2:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 9, i32 0, i8 1 }, i32 %rem1, i1 true) #[[ATTR]]
+  ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES2]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]]
+
+  ; A[10 + 3 * NonUniformResourceIndex(GI)];
+  %mul1 = mul i32 %nuri1, 3
+  %add2 = add i32 %mul1, 10
+  %res3 = call target("dx.TypedBuffer", float, 1, 0, 0)
+           @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %add2, ptr @A.str)
+  ; CHECK: %[[RES3:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 9, i32 0, i8 1 }, i32 %add2, i1 true) #[[ATTR]]
+  ; CHECK: %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES3]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]]
+  ret void
+}
+
+; CHECK: attributes #[[ATTR]] = {{{.*}} memory(none) {{.*}}}
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }



More information about the llvm-commits mailing list