[llvm] [DirectX] Implement the DXILCBufferAccess pass (PR #134571)

Wed Apr 9 19:47:06 PDT 2025

================
@@ -0,0 +1,121 @@
+; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
+
+; cbuffer CB : register(b0) {
+;   float a1[3];
+;   double3 a2[2];
+;   float16_t a3[2][2];
+;   uint64_t a4[3];
+;   int4 a5[2][3][4];
+;   uint16_t a6[1];
+;   int64_t a7[2];
+;   bool a8[4];
+; }
+%__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [2 x [3 x [4 x <4 x i32>]]], [1 x i16], [2 x i64], [4 x i32] }>
+%struct.S = type { float, <3 x double>, half, i64, <4 x i32>, i16, i64, i32, [12 x i8] }
+
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) poison
+ at a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
+ at a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32
+ at a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2
+ at a4 = external local_unnamed_addr addrspace(2) global [3 x i64], align 8
+ at a5 = external local_unnamed_addr addrspace(2) global [2 x [3 x [4 x <4 x i32>]]], align 16
+ at a6 = external local_unnamed_addr addrspace(2) global [1 x i16], align 2
+ at a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8
+ at a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4
+
+define void @f(ptr %dst) {
+entry:
+  %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false)
+  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) %CB.cb_h.i.i, ptr @CB.cb, align 4
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+  ; CHECK: store float [[X]], ptr %dst
+  %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4
----------------
bogner wrote:

I somehow missed this comment when I went through your feedback earlier. Just to close to loop - I based these tests off of the output of `clang -mllvm -stop-before=dxil-cbuffer-access`, so while clang does usually generate typed GEPs it seems they're often flattened into i8 GEPs somewhere else in the optimization pipeline.

https://github.com/llvm/llvm-project/pull/134571