[llvm] [DirectX] Implement the DXILCBufferAccess pass (PR #134571)

Justin Bogner via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 15 22:29:11 PDT 2025


https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/134571

>From 5bbc471bbffb5f6461481720dfff09347c3fcdb1 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Mon, 3 Mar 2025 12:04:30 -0700
Subject: [PATCH 1/3] [DirectX] Implement the DXILCBufferAccess pass

This introduces a pass that walks accesses to globals in cbuffers and
replaces them with accesses via the cbuffer handle itself. The logic to
interpret the cbuffer metadata is kept in `lib/Frontend/HLSL` so that it
can be reused by other consumers of that metadata.

Fixes #124630.
---
 llvm/include/llvm/Frontend/HLSL/CBuffer.h     |  64 ++++++
 llvm/lib/Frontend/HLSL/CBuffer.cpp            |  71 ++++++
 llvm/lib/Frontend/HLSL/CMakeLists.txt         |   1 +
 llvm/lib/Target/DirectX/CMakeLists.txt        |   1 +
 llvm/lib/Target/DirectX/DXILCBufferAccess.cpp | 209 ++++++++++++++++++
 llvm/lib/Target/DirectX/DXILCBufferAccess.h   |  28 +++
 llvm/lib/Target/DirectX/DirectX.h             |   6 +
 .../Target/DirectX/DirectXPassRegistry.def    |   1 +
 .../Target/DirectX/DirectXTargetMachine.cpp   |   3 +
 .../CodeGen/DirectX/CBufferAccess/arrays.ll   | 121 ++++++++++
 .../CodeGen/DirectX/CBufferAccess/float.ll    |  22 ++
 .../DirectX/CBufferAccess/gep-ce-two-uses.ll  |  32 +++
 .../CodeGen/DirectX/CBufferAccess/scalars.ll  | 105 +++++++++
 .../CodeGen/DirectX/CBufferAccess/vectors.ll  | 116 ++++++++++
 llvm/test/CodeGen/DirectX/llc-pipeline.ll     |   1 +
 15 files changed, 781 insertions(+)
 create mode 100644 llvm/include/llvm/Frontend/HLSL/CBuffer.h
 create mode 100644 llvm/lib/Frontend/HLSL/CBuffer.cpp
 create mode 100644 llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
 create mode 100644 llvm/lib/Target/DirectX/DXILCBufferAccess.h
 create mode 100644 llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll
 create mode 100644 llvm/test/CodeGen/DirectX/CBufferAccess/float.ll
 create mode 100644 llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll
 create mode 100644 llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll
 create mode 100644 llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll

diff --git a/llvm/include/llvm/Frontend/HLSL/CBuffer.h b/llvm/include/llvm/Frontend/HLSL/CBuffer.h
new file mode 100644
index 0000000000000..cf45b5ff5a8e2
--- /dev/null
+++ b/llvm/include/llvm/Frontend/HLSL/CBuffer.h
@@ -0,0 +1,64 @@
+//===- CBuffer.h - HLSL constant buffer handling ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file contains utilities to work with constant buffers in HLSL.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FRONTEND_HLSL_CBUFFER_H
+#define LLVM_FRONTEND_HLSL_CBUFFER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include <optional>
+
+namespace llvm {
+class Module;
+class GlobalVariable;
+class NamedMDNode;
+
+namespace hlsl {
+
+struct CBufferMember {
+  CBufferMember(GlobalVariable *GV, size_t Offset) : GV(GV), Offset(Offset) {}
+
+  GlobalVariable *GV;
+  size_t Offset;
+};
+
+struct CBufferMapping {
+  CBufferMapping(GlobalVariable *Handle) : Handle(Handle) {}
+
+  GlobalVariable *Handle;
+  SmallVector<CBufferMember> Members;
+};
+
+class CBufferMetadata {
+  NamedMDNode *MD;
+  SmallVector<CBufferMapping> Mappings;
+
+  CBufferMetadata(NamedMDNode *MD) : MD(MD) {}
+
+public:
+  static std::optional<CBufferMetadata> get(Module &M);
+
+  using iterator = SmallVector<CBufferMapping>::iterator;
+  iterator begin() { return Mappings.begin(); }
+  iterator end() { return Mappings.end(); }
+
+  void eraseFromModule();
+};
+
+APInt translateCBufArrayOffset(const DataLayout &DL, APInt Offset,
+                               ArrayType *Ty);
+
+} // namespace hlsl
+} // namespace llvm
+
+#endif // LLVM_FRONTEND_HLSL_CBUFFER_H
diff --git a/llvm/lib/Frontend/HLSL/CBuffer.cpp b/llvm/lib/Frontend/HLSL/CBuffer.cpp
new file mode 100644
index 0000000000000..b311f6aea9636
--- /dev/null
+++ b/llvm/lib/Frontend/HLSL/CBuffer.cpp
@@ -0,0 +1,71 @@
+//===- CBuffer.cpp - HLSL constant buffer handling ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Frontend/HLSL/CBuffer.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+using namespace llvm::hlsl;
+
+static size_t getMemberOffset(GlobalVariable *Handle, size_t Index) {
+  auto *HandleTy = cast<TargetExtType>(Handle->getValueType());
+  assert(HandleTy->getName().ends_with(".CBuffer") && "Not a cbuffer type");
+  assert(HandleTy->getNumTypeParameters() == 1 && "Expected layout type");
+
+  auto *LayoutTy = cast<TargetExtType>(HandleTy->getTypeParameter(0));
+  assert(LayoutTy->getName().ends_with(".Layout") && "Not a layout type");
+
+  // Skip the "size" parameter.
+  size_t ParamIndex = Index + 1;
+  assert(LayoutTy->getNumIntParameters() > ParamIndex &&
+         "Not enough parameters");
+
+  return LayoutTy->getIntParameter(ParamIndex);
+}
+
+std::optional<CBufferMetadata> CBufferMetadata::get(Module &M) {
+  NamedMDNode *CBufMD = M.getNamedMetadata("hlsl.cbs");
+  if (!CBufMD)
+    return std::nullopt;
+
+  std::optional<CBufferMetadata> Result({CBufMD});
+
+  for (const MDNode *MD : CBufMD->operands()) {
+    assert(MD->getNumOperands() && "Invalid cbuffer metadata");
+
+    auto *Handle = cast<GlobalVariable>(
+        cast<ValueAsMetadata>(MD->getOperand(0))->getValue());
+    CBufferMapping &Mapping = Result->Mappings.emplace_back(Handle);
+
+    for (int I = 1, E = MD->getNumOperands(); I < E; ++I) {
+      Metadata *OpMD = MD->getOperand(I);
+      // Some members may be null if they've been optimized out.
+      if (!OpMD)
+        continue;
+      auto *V = cast<GlobalVariable>(cast<ValueAsMetadata>(OpMD)->getValue());
+      Mapping.Members.emplace_back(V, getMemberOffset(Handle, I - 1));
+    }
+  }
+
+  return Result;
+}
+
+
+void CBufferMetadata::eraseFromModule() {
+  // Remove the cbs named metadata
+  MD->eraseFromParent();
+}
+
+APInt hlsl::translateCBufArrayOffset(const DataLayout &DL, APInt Offset,
+                                     ArrayType *Ty) {
+  int64_t TypeSize = DL.getTypeSizeInBits(Ty->getElementType()) / 8;
+  int64_t RoundUp = alignTo(TypeSize, Align(16));
+  return Offset.udiv(TypeSize) * RoundUp;
+}
diff --git a/llvm/lib/Frontend/HLSL/CMakeLists.txt b/llvm/lib/Frontend/HLSL/CMakeLists.txt
index eda6cb8e69a49..07a0c845ceef6 100644
--- a/llvm/lib/Frontend/HLSL/CMakeLists.txt
+++ b/llvm/lib/Frontend/HLSL/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_llvm_component_library(LLVMFrontendHLSL
+  CBuffer.cpp
   HLSLResource.cpp
 
   ADDITIONAL_HEADER_DIRS
diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt b/llvm/lib/Target/DirectX/CMakeLists.txt
index 13f8adbe4f132..c55028bc75dd6 100644
--- a/llvm/lib/Target/DirectX/CMakeLists.txt
+++ b/llvm/lib/Target/DirectX/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_target(DirectXCodeGen
   DirectXTargetMachine.cpp
   DirectXTargetTransformInfo.cpp
   DXContainerGlobals.cpp
+  DXILCBufferAccess.cpp
   DXILDataScalarization.cpp
   DXILFinalizeLinkage.cpp
   DXILFlattenArrays.cpp
diff --git a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
new file mode 100644
index 0000000000000..f8771efeac991
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
@@ -0,0 +1,209 @@
+//===- DXILCBufferAccess.cpp - Translate CBuffer Loads --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DXILCBufferAccess.h"
+#include "DirectX.h"
+#include "llvm/Frontend/HLSL/CBuffer.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+#define DEBUG_TYPE "dxil-cbuffer-access"
+using namespace llvm;
+
+namespace {
+/// Helper for building a `load.cbufferrow` intrinsic given a simple type.
+struct CBufferRowIntrin {
+  Intrinsic::ID IID;
+  Type *RetTy;
+  unsigned int EltSize;
+  unsigned int NumElts;
+
+  CBufferRowIntrin(const DataLayout &DL, Type *Ty) {
+    assert(Ty == Ty->getScalarType() && "Expected scalar type");
+
+    switch (DL.getTypeSizeInBits(Ty)) {
+    case 16:
+      IID = Intrinsic::dx_resource_load_cbufferrow_8;
+      RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty);
+      EltSize = 2;
+      NumElts = 8;
+      break;
+    case 32:
+      IID = Intrinsic::dx_resource_load_cbufferrow_4;
+      RetTy = StructType::get(Ty, Ty, Ty, Ty);
+      EltSize = 4;
+      NumElts = 4;
+      break;
+    case 64:
+      IID = Intrinsic::dx_resource_load_cbufferrow_2;
+      RetTy = StructType::get(Ty, Ty);
+      EltSize = 8;
+      NumElts = 2;
+      break;
+    default:
+      llvm_unreachable("Only 16, 32, and 64 bit types supported");
+  }
+  }
+};
+} // namespace
+
+static size_t getOffsetForCBufferGEP(GEPOperator *GEP, GlobalVariable *Global,
+                                     const DataLayout &DL) {
+  // Since we should always have a constant offset, we should only ever have a
+  // single GEP of indirection from the Global.
+  assert(GEP->getPointerOperand() == Global &&
+         "Indirect access to resource handle");
+
+  APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
+  bool Success = GEP->accumulateConstantOffset(DL, ConstantOffset);
+  (void)Success;
+  assert(Success && "Offsets into cbuffer globals must be constant");
+
+  if (auto *ATy = dyn_cast<ArrayType>(Global->getValueType()))
+    ConstantOffset = hlsl::translateCBufArrayOffset(DL, ConstantOffset, ATy);
+
+  return ConstantOffset.getZExtValue();
+}
+
+/// Replace access via cbuffer global with a load from the cbuffer handle
+/// itself.
+static void replaceAccess(LoadInst *LI, GlobalVariable *Global,
+                          GlobalVariable *HandleGV, size_t BaseOffset,
+                          SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+  const DataLayout &DL = HandleGV->getDataLayout();
+
+  size_t Offset = BaseOffset;
+  if (auto *GEP = dyn_cast<GEPOperator>(LI->getPointerOperand()))
+    Offset += getOffsetForCBufferGEP(GEP, Global, DL);
+  else if (LI->getPointerOperand() != Global)
+    llvm_unreachable("Load instruction doesn't reference cbuffer global");
+
+  IRBuilder<> Builder(LI);
+  auto *Handle = Builder.CreateLoad(HandleGV->getValueType(), HandleGV,
+                                    HandleGV->getName());
+
+  Type *Ty = LI->getType();
+  CBufferRowIntrin Intrin(DL, Ty->getScalarType());
+  // The cbuffer consists of some number of 16-byte rows.
+  unsigned int CurrentRow = Offset / 16;
+  unsigned int CurrentIndex = (Offset % 16) / Intrin.EltSize;
+
+  auto *CBufLoad = Builder.CreateIntrinsic(
+      Intrin.RetTy, Intrin.IID,
+      {Handle, ConstantInt::get(Builder.getInt32Ty(), CurrentRow)}, nullptr,
+      LI->getName());
+  auto *Elt =
+      Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, LI->getName());
+
+  Value *Result = nullptr;
+  unsigned int Remaining =
+      ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1;
+  if (Remaining == 0) {
+    // We only have a single element, so we're done.
+    Result = Elt;
+
+    // However, if we loaded a <1 x T>, then we need to adjust the type here.
+    if (auto *VT = dyn_cast<FixedVectorType>(LI->getType()))
+      if (VT->getNumElements() == 1)
+        Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
+                                             Builder.getInt32(0));
+  } else {
+    // Walk each element and extract it, wrapping to new rows as needed.
+    SmallVector<Value *> Extracts{Elt};
+    while (Remaining--) {
+      CurrentIndex %= Intrin.NumElts;
+
+      if (CurrentIndex == 0)
+        CBufLoad = Builder.CreateIntrinsic(
+            Intrin.RetTy, Intrin.IID,
+            {Handle, ConstantInt::get(Builder.getInt32Ty(), ++CurrentRow)},
+            nullptr, LI->getName());
+
+      Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
+                                                    LI->getName()));
+    }
+
+    // Finally, we build up the original loaded value.
+    Result = PoisonValue::get(Ty);
+    for (int I = 0, E = Extracts.size(); I < E; ++I)
+      Result =
+          Builder.CreateInsertElement(Result, Extracts[I], Builder.getInt32(I));
+  }
+
+  LI->replaceAllUsesWith(Result);
+  DeadInsts.push_back(LI);
+}
+
+static void replaceAccessesWithHandle(GlobalVariable *Global,
+                                      GlobalVariable *HandleGV,
+                                      size_t BaseOffset) {
+  SmallVector<WeakTrackingVH> DeadInsts;
+
+  SmallVector<User *> ToProcess{Global->users()};
+  while (!ToProcess.empty()) {
+    User *Cur = ToProcess.pop_back_val();
+
+    // If we have a load instruction, replace the access.
+    if (auto *LI = dyn_cast<LoadInst>(Cur)) {
+      replaceAccess(LI, Global, HandleGV, BaseOffset, DeadInsts);
+      continue;
+    }
+
+    // Otherwise, walk users looking for a load...
+    ToProcess.append(Cur->user_begin(), Cur->user_end());
+  }
+  RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
+}
+
+static bool replaceCBufferAccesses(Module &M) {
+  std::optional<hlsl::CBufferMetadata> CBufMD = hlsl::CBufferMetadata::get(M);
+  if (!CBufMD)
+    return false;
+
+  for (const hlsl::CBufferMapping &Mapping : *CBufMD)
+    for (const hlsl::CBufferMember &Member : Mapping.Members) {
+      replaceAccessesWithHandle(Member.GV, Mapping.Handle, Member.Offset);
+      Member.GV->removeFromParent();
+    }
+
+  CBufMD->eraseFromModule();
+  return true;
+}
+
+PreservedAnalyses DXILCBufferAccess::run(Module &M, ModuleAnalysisManager &AM) {
+  PreservedAnalyses PA;
+  bool Changed = replaceCBufferAccesses(M);
+
+  if (!Changed)
+    return PreservedAnalyses::all();
+  return PA;
+}
+
+namespace {
+class DXILCBufferAccessLegacy : public ModulePass {
+public:
+  bool runOnModule(Module &M) override {
+    return replaceCBufferAccesses(M);
+  }
+  StringRef getPassName() const override { return "DXIL CBuffer Access"; }
+  DXILCBufferAccessLegacy() : ModulePass(ID) {}
+
+  static char ID; // Pass identification.
+};
+char DXILCBufferAccessLegacy::ID = 0;
+} // end anonymous namespace
+
+INITIALIZE_PASS(DXILCBufferAccessLegacy, DEBUG_TYPE, "DXIL CBuffer Access",
+                false, false)
+
+ModulePass *llvm::createDXILCBufferAccessLegacyPass() {
+  return new DXILCBufferAccessLegacy();
+}
diff --git a/llvm/lib/Target/DirectX/DXILCBufferAccess.h b/llvm/lib/Target/DirectX/DXILCBufferAccess.h
new file mode 100644
index 0000000000000..6c1cde164004e
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILCBufferAccess.h
@@ -0,0 +1,28 @@
+//===- DXILCBufferAccess.h - Translate CBuffer Loads ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file Pass for replacing loads from cbuffers in the cbuffer address space to
+// cbuffer load intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_DIRECTX_DXILCBUFFERACCESS_H
+#define LLVM_LIB_TARGET_DIRECTX_DXILCBUFFERACCESS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class DXILCBufferAccess : public PassInfoMixin<DXILCBufferAccess> {
+public:
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_DIRECTX_DXILCBUFFERACCESS_H
diff --git a/llvm/lib/Target/DirectX/DirectX.h b/llvm/lib/Target/DirectX/DirectX.h
index 96a8a08c875f8..c0eb221d12203 100644
--- a/llvm/lib/Target/DirectX/DirectX.h
+++ b/llvm/lib/Target/DirectX/DirectX.h
@@ -35,6 +35,12 @@ void initializeDXILIntrinsicExpansionLegacyPass(PassRegistry &);
 /// Pass to expand intrinsic operations that lack DXIL opCodes
 ModulePass *createDXILIntrinsicExpansionLegacyPass();
 
+/// Initializer for DXIL CBuffer Access Pass
+void initializeDXILCBufferAccessLegacyPass(PassRegistry &);
+
+/// Pass to translate loads in the cbuffer address space to intrinsics
+ModulePass *createDXILCBufferAccessLegacyPass();
+
 /// Initializer for DXIL Data Scalarization Pass
 void initializeDXILDataScalarizationLegacyPass(PassRegistry &);
 
diff --git a/llvm/lib/Target/DirectX/DirectXPassRegistry.def b/llvm/lib/Target/DirectX/DirectXPassRegistry.def
index 87d91ead1896f..37093f16680a9 100644
--- a/llvm/lib/Target/DirectX/DirectXPassRegistry.def
+++ b/llvm/lib/Target/DirectX/DirectXPassRegistry.def
@@ -23,6 +23,7 @@ MODULE_ANALYSIS("dxil-root-signature-analysis", dxil::RootSignatureAnalysis())
 #ifndef MODULE_PASS
 #define MODULE_PASS(NAME, CREATE_PASS)
 #endif
+MODULE_PASS("dxil-cbuffer-access", DXILCBufferAccess())
 MODULE_PASS("dxil-data-scalarization", DXILDataScalarization())
 MODULE_PASS("dxil-flatten-arrays", DXILFlattenArrays())
 MODULE_PASS("dxil-intrinsic-expansion", DXILIntrinsicExpansion())
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index 747e4b3eb9411..41f6f37a41f9d 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "DirectXTargetMachine.h"
+#include "DXILCBufferAccess.h"
 #include "DXILDataScalarization.h"
 #include "DXILFlattenArrays.h"
 #include "DXILIntrinsicExpansion.h"
@@ -65,6 +66,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
   initializeRootSignatureAnalysisWrapperPass(*PR);
   initializeDXILFinalizeLinkageLegacyPass(*PR);
   initializeDXILPrettyPrinterLegacyPass(*PR);
+  initializeDXILCBufferAccessLegacyPass(*PR);
 }
 
 class DXILTargetObjectFile : public TargetLoweringObjectFile {
@@ -96,6 +98,7 @@ class DirectXPassConfig : public TargetPassConfig {
   void addCodeGenPrepare() override {
     addPass(createDXILFinalizeLinkageLegacyPass());
     addPass(createDXILIntrinsicExpansionLegacyPass());
+    addPass(createDXILCBufferAccessLegacyPass());
     addPass(createDXILDataScalarizationLegacyPass());
     addPass(createDXILFlattenArraysLegacyPass());
     addPass(createDXILResourceAccessLegacyPass());
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll
new file mode 100644
index 0000000000000..7478cc5f362dc
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll
@@ -0,0 +1,121 @@
+; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
+
+; cbuffer CB : register(b0) {
+;   float a1[3];
+;   double3 a2[2];
+;   float16_t a3[2][2];
+;   uint64_t a4[3];
+;   int4 a5[2][3][4];
+;   uint16_t a6[1];
+;   int64_t a7[2];
+;   bool a8[4];
+; }
+%__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [2 x [3 x [4 x <4 x i32>]]], [1 x i16], [2 x i64], [4 x i32] }>
+%struct.S = type { float, <3 x double>, half, i64, <4 x i32>, i16, i64, i32, [12 x i8] }
+
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) poison
+ at a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
+ at a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32
+ at a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2
+ at a4 = external local_unnamed_addr addrspace(2) global [3 x i64], align 8
+ at a5 = external local_unnamed_addr addrspace(2) global [2 x [3 x [4 x <4 x i32>]]], align 16
+ at a6 = external local_unnamed_addr addrspace(2) global [1 x i16], align 2
+ at a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8
+ at a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4
+
+define void @f(ptr %dst) {
+entry:
+  %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false)
+  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) %CB.cb_h.i.i, ptr @CB.cb, align 4
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+  ; CHECK: store float [[X]], ptr %dst
+  %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4
+  store float %a1, ptr %dst, align 32
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5)
+  ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
+  ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
+  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6)
+  ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
+  ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
+  ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1
+  ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8
+  ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]]
+  %a2 = load <3 x double>, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a2, i32 32), align 8
+  %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 8
+  store <3 x double> %a2, ptr %a2.i, align 32
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 8)
+  ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 32
+  ; CHECK: store half [[X]], ptr [[PTR]]
+  %a3 = load half, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a3, i32 6), align 2
+  %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 32
+  store half %a3, ptr %a3.i, align 2
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 12)
+  ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40
+  ; CHECK: store i64 [[X]], ptr [[PTR]]
+  %a4 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a4, i32 8), align 8
+  %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 40
+  store i64 %a4, ptr %a4.i, align 8
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 26)
+  ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+  ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+  ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
+  ; CHECK: [[A:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
+  ; CHECK: [[VEC0:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
+  ; CHECK: [[VEC1:%.*]] = insertelement <4 x i32> [[VEC0]], i32 [[Y]], i32 1
+  ; CHECK: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[Z]], i32 2
+  ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48
+  ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]]
+  %a5 = load <4 x i32>, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a5, i32 272), align 4
+  %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 48
+  store <4 x i32> %a5, ptr %a5.i, align 4
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 38)
+  ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 64
+  ; CHECK: store i16 [[X]], ptr [[PTR]]
+  %a6 = load i16, ptr addrspace(2) @a6, align 2
+  %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 64
+  store i16 %a6, ptr %a6.i, align 2
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 40)
+  ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72
+  ; CHECK: store i64 [[X]], ptr [[PTR]]
+  %a7 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a7, i32 8), align 8
+  %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 72
+  store i64 %a7, ptr %a7.i, align 8
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 42)
+  ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 80
+  ; CHECK: store i32 [[X]], ptr [[PTR]]
+  %a8 = load i32, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a8, i32 4), align 4, !range !1, !noundef !2
+  %a8.i = getelementptr inbounds nuw i8, ptr %dst, i32 80
+  store i32 %a8, ptr %a8.i, align 4
+
+  ret void
+}
+
+!hlsl.cbs = !{!0}
+
+!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7, ptr addrspace(2) @a8}
+!1 = !{i32 0, i32 2}
+!2 = !{}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll
new file mode 100644
index 0000000000000..8abbb4a831f1e
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
+
+%__cblayout_CB = type <{ float }>
+%struct.S = type { float }
+
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 4, 0)) poison
+ at x = external local_unnamed_addr addrspace(2) global float, align 4
+
+define void @f(ptr %dst) {
+entry:
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+  ; CHECK: store float [[X]], ptr %dst
+  %x = load float, ptr addrspace(2) @x, align 4
+  store float %x, ptr %dst, align 4
+  ret void
+}
+
+!hlsl.cbs = !{!0}
+
+!0 = !{ptr @CB.cb, ptr addrspace(2) @x}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll
new file mode 100644
index 0000000000000..e9bcc7bbad067
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
+
+; cbuffer CB : register(b0) {
+;   float a1[3];
+; }
+%__cblayout_CB = type <{ [3 x float] }>
+
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) poison
+ at a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
+
+define void @f(ptr %dst) {
+entry:
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+  ; CHECK: store float [[X]], ptr %dst
+  %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4
+  store float %a1, ptr %dst, align 32
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+  ; CHECK: store float [[X]], ptr %dst
+  %a2 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4
+  store float %a2, ptr %dst, align 32
+
+  ret void
+}
+
+!hlsl.cbs = !{!0}
+
+!0 = !{ptr @CB.cb, ptr addrspace(2) @a1}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll
new file mode 100644
index 0000000000000..8305c1e177995
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll
@@ -0,0 +1,105 @@
+; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
+
+; cbuffer CB {
+;   float a1;     // offset  0, size  4
+;   int a2;       // offset  4, size  4
+;   bool a3;      // offset  8, size  4
+;   float16_t a4; // offset 12, size  2
+;   uint16_t a5;  // offset 14, size  2
+;   double a6;    // offset 16, size  8
+;   int64_t a7;   // offset 24, size  8
+; }
+%__cblayout_CB = type <{ float, i32, i32, half, i16, double, i64 }>
+%struct.Scalars = type { float, i32, i32, half, i16, double, i64 }
+
+; CHECK: @CB.cb =
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) poison
+; CHECK-NOT: @a1 =
+ at a1 = external local_unnamed_addr addrspace(2) global float, align 4
+; CHECK-NOT: @a2 =
+ at a2 = external local_unnamed_addr addrspace(2) global i32, align 4
+; CHECK-NOT: @a3 =
+ at a3 = external local_unnamed_addr addrspace(2) global i32, align 4
+; CHECK-NOT: @a4 =
+ at a4 = external local_unnamed_addr addrspace(2) global half, align 2
+; CHECK-NOT: @a5 =
+ at a5 = external local_unnamed_addr addrspace(2) global i16, align 2
+; CHECK-NOT: @a6 =
+ at a6 = external local_unnamed_addr addrspace(2) global double, align 8
+; CHECK-NOT: @a7 =
+ at a7 = external local_unnamed_addr addrspace(2) global i64, align 8
+
+define void @f(ptr %dst) {
+entry:
+  %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false)
+  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) %CB.cb_h.i.i, ptr @CB.cb, align 4
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[A1:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+  ; CHECK: store float [[A1]], ptr %dst
+  %a1 = load float, ptr addrspace(2) @a1, align 4
+  store float %a1, ptr %dst, align 8
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[A2:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4
+  ; CHECK: store i32 [[A2]], ptr [[PTR]]
+  %a2 = load i32, ptr addrspace(2) @a2, align 4
+  %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 4
+  store i32 %a2, ptr %a2.i, align 4
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[A3:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8
+  ; CHECK: store i32 [[A3]], ptr [[PTR]]
+  %a3 = load i32, ptr addrspace(2) @a3, align 4, !range !1, !noundef !2
+  %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 8
+  store i32 %a3, ptr %a3.i, align 8
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[A4:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 6
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 12
+  ; CHECK: store half [[A4]], ptr [[PTR]]
+  %a4 = load half, ptr addrspace(2) @a4, align 2
+  %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 12
+  store half %a4, ptr %a4.i, align 4
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[A5:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 7
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 14
+  ; CHECK: store i16 [[A5]], ptr [[PTR]]
+  %a5 = load i16, ptr addrspace(2) @a5, align 2
+  %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 14
+  store i16 %a5, ptr %a5.i, align 2
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[A6:%.*]] = extractvalue { double, double } [[LOAD]], 0
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16
+  ; CHECK: store double [[A6]], ptr [[PTR]]
+  %a6 = load double, ptr addrspace(2) @a6, align 8
+  %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 16
+  store double %a6, ptr %a6.i, align 8
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[A7:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 24
+  ; CHECK: store i64 [[A7]], ptr [[PTR]]
+  %a7 = load i64, ptr addrspace(2) @a7, align 8
+  %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 24
+  store i64 %a7, ptr %a7.i, align 8
+
+  ret void
+}
+
+!hlsl.cbs = !{!0}
+
+!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7}
+!1 = !{i32 0, i32 2}
+!2 = !{}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll
new file mode 100644
index 0000000000000..2e599b971a6ce
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll
@@ -0,0 +1,116 @@
+; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
+
+; cbuffer CB {
+;   float3 a1;     // offset   0, size 12 (+4)
+;   double3 a2;    // offset  16, size 24
+;   float16_t2 a3; // offset  40, size  4 (+4)
+;   uint64_t3 a4;  // offset  48, size 24 (+8)
+;   int4 a5;       // offset  80, size 16
+;   uint16_t3 a6;  // offset  96, size  6 (+10)
+; };
+%__cblayout_CB = type <{ <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16> }>
+%struct.S = type { <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16> }
+
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 136, 0, 16, 40, 48, 80, 96)) poison
+ at a1 = external local_unnamed_addr addrspace(2) global <3 x float>, align 16
+ at a2 = external local_unnamed_addr addrspace(2) global <3 x double>, align 32
+ at a3 = external local_unnamed_addr addrspace(2) global <2 x half>, align 4
+ at a4 = external local_unnamed_addr addrspace(2) global <3 x i64>, align 32
+ at a5 = external local_unnamed_addr addrspace(2) global <4 x i32>, align 16
+ at a6 = external local_unnamed_addr addrspace(2) global <3 x i16>, align 8
+
+define void @f(ptr %dst) {
+entry:
+  %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 136, 0, 16, 40, 48, 80, 96)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false)
+  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 136, 0, 16, 40, 48, 80, 96)) %CB.cb_h.i.i, ptr @CB.cb, align 4
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
+  ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+  ; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 1
+  ; CHECK: [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 2
+  ; CHECK: [[VEC0:%.*]] = insertelement <3 x float> poison, float [[X]], i32 0
+  ; CHECK: [[VEC1:%.*]] = insertelement <3 x float> [[VEC0]], float [[Y]], i32 1
+  ; CHECK: [[VEC2:%.*]] = insertelement <3 x float> [[VEC1]], float [[Z]], i32 2
+  ; CHECK: store <3 x float> [[VEC2]], ptr %dst
+  %a1 = load <3 x float>, ptr addrspace(2) @a1, align 16
+  store <3 x float> %a1, ptr %dst, align 4
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
+  ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
+  ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2)
+  ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
+  ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
+  ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1
+  ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16
+  ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]]
+  %a2 = load <3 x double>, ptr addrspace(2) @a2, align 32
+  %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 16
+  store <3 x double> %a2, ptr %a2.i, align 8
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2)
+  ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 4
+  ; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 5
+  ; CHECK: [[VEC0:%.*]] = insertelement <2 x half> poison, half [[X]], i32 0
+  ; CHECK: [[VEC1:%.*]] = insertelement <2 x half> [[VEC0]], half [[Y]], i32 1
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40
+  ; CHECK: store <2 x half> [[VEC1]], ptr [[PTR]]
+  %a3 = load <2 x half>, ptr addrspace(2) @a3, align 4
+  %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 40
+  store <2 x half> %a3, ptr %a3.i, align 2
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 3)
+  ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
+  ; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1
+  ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 4)
+  ; CHECK: [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
+  ; CHECK: [[VEC0:%.*]] = insertelement <3 x i64> poison, i64 [[X]], i32 0
+  ; CHECK: [[VEC1:%.*]] = insertelement <3 x i64> [[VEC0]], i64 [[Y]], i32 1
+  ; CHECK: [[VEC2:%.*]] = insertelement <3 x i64> [[VEC1]], i64 [[Z]], i32 2
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48
+  ; CHECK: store <3 x i64> [[VEC2]], ptr [[PTR]]
+  %a4 = load <3 x i64>, ptr addrspace(2) @a4, align 32
+  %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 48
+  store <3 x i64> %a4, ptr %a4.i, align 8
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5)
+  ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+  ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+  ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
+  ; CHECK: [[A:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
+  ; CHECK: [[VEC0:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
+  ; CHECK: [[VEC1:%.*]] = insertelement <4 x i32> [[VEC0]], i32 [[Y]], i32 1
+  ; CHECK: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[Z]], i32 2
+  ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72
+  ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]]
+  %a5 = load <4 x i32>, ptr addrspace(2) @a5, align 16
+  %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 72
+  store <4 x i32> %a5, ptr %a5.i, align 4
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6)
+  ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
+  ; CHECK: [[Y:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 1
+  ; CHECK: [[Z:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 2
+  ; CHECK: [[VEC0:%.*]] = insertelement <3 x i16> poison, i16 [[X]], i32 0
+  ; CHECK: [[VEC1:%.*]] = insertelement <3 x i16> [[VEC0]], i16 [[Y]], i32 1
+  ; CHECK: [[VEC2:%.*]] = insertelement <3 x i16> [[VEC1]], i16 [[Z]], i32 2
+  ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 88
+  ; CHECK: store <3 x i16> [[VEC2]], ptr [[PTR]]
+  %a6 = load <3 x i16>, ptr addrspace(2) @a6, align 8
+  %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 88
+  store <3 x i16> %a6, ptr %a6.i, align 2
+
+  ret void
+}
+
+!hlsl.cbs = !{!0}
+
+!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6}
diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
index ee70cec534bc5..b1bd9f16f4efa 100644
--- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll
+++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
@@ -15,6 +15,7 @@
 ; CHECK-NEXT: ModulePass Manager
 ; CHECK-NEXT:   DXIL Finalize Linkage
 ; CHECK-NEXT:   DXIL Intrinsic Expansion
+; CHECK-NEXT:   DXIL CBuffer Access
 ; CHECK-NEXT:   DXIL Data Scalarization
 ; CHECK-NEXT:   DXIL Array Flattener
 ; CHECK-NEXT:   FunctionPass Manager

>From 15440ed5c63094b379db9ca35c2a2c20849de544 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Tue, 8 Apr 2025 09:08:56 +0900
Subject: [PATCH 2/3] clang-format

---
 llvm/lib/Frontend/HLSL/CBuffer.cpp            | 1 -
 llvm/lib/Target/DirectX/DXILCBufferAccess.cpp | 6 ++----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Frontend/HLSL/CBuffer.cpp b/llvm/lib/Frontend/HLSL/CBuffer.cpp
index b311f6aea9636..db1003ad3da71 100644
--- a/llvm/lib/Frontend/HLSL/CBuffer.cpp
+++ b/llvm/lib/Frontend/HLSL/CBuffer.cpp
@@ -57,7 +57,6 @@ std::optional<CBufferMetadata> CBufferMetadata::get(Module &M) {
   return Result;
 }
 
-
 void CBufferMetadata::eraseFromModule() {
   // Remove the cbs named metadata
   MD->eraseFromParent();
diff --git a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
index f8771efeac991..3867502e780b8 100644
--- a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
@@ -50,7 +50,7 @@ struct CBufferRowIntrin {
       break;
     default:
       llvm_unreachable("Only 16, 32, and 64 bit types supported");
-  }
+    }
   }
 };
 } // namespace
@@ -190,9 +190,7 @@ PreservedAnalyses DXILCBufferAccess::run(Module &M, ModuleAnalysisManager &AM) {
 namespace {
 class DXILCBufferAccessLegacy : public ModulePass {
 public:
-  bool runOnModule(Module &M) override {
-    return replaceCBufferAccesses(M);
-  }
+  bool runOnModule(Module &M) override { return replaceCBufferAccesses(M); }
   StringRef getPassName() const override { return "DXIL CBuffer Access"; }
   DXILCBufferAccessLegacy() : ModulePass(ID) {}
 

>From 0290631275b7aa9a04e78703498d6e29d3a23890 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail at justinbogner.com>
Date: Wed, 9 Apr 2025 14:40:33 +0900
Subject: [PATCH 3/3] Review feedback

---
 llvm/include/llvm/Frontend/HLSL/CBuffer.h     |  8 ++---
 llvm/lib/Frontend/HLSL/CBuffer.cpp            |  3 +-
 llvm/lib/Target/DirectX/DXILCBufferAccess.cpp | 15 +++++----
 .../DirectX/CBufferAccess/array-typedgep.ll   | 32 +++++++++++++++++++
 .../CodeGen/DirectX/CBufferAccess/arrays.ll   |  5 ++-
 .../CodeGen/DirectX/CBufferAccess/float.ll    |  5 ++-
 .../DirectX/CBufferAccess/gep-ce-two-uses.ll  |  6 +++-
 .../CodeGen/DirectX/CBufferAccess/scalars.ll  | 13 +++-----
 .../CodeGen/DirectX/CBufferAccess/vectors.ll  | 11 ++++---
 9 files changed, 71 insertions(+), 27 deletions(-)
 create mode 100644 llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll

diff --git a/llvm/include/llvm/Frontend/HLSL/CBuffer.h b/llvm/include/llvm/Frontend/HLSL/CBuffer.h
index cf45b5ff5a8e2..694a7fa854576 100644
--- a/llvm/include/llvm/Frontend/HLSL/CBuffer.h
+++ b/llvm/include/llvm/Frontend/HLSL/CBuffer.h
@@ -26,17 +26,17 @@ class NamedMDNode;
 namespace hlsl {
 
 struct CBufferMember {
-  CBufferMember(GlobalVariable *GV, size_t Offset) : GV(GV), Offset(Offset) {}
-
   GlobalVariable *GV;
   size_t Offset;
+
+  CBufferMember(GlobalVariable *GV, size_t Offset) : GV(GV), Offset(Offset) {}
 };
 
 struct CBufferMapping {
-  CBufferMapping(GlobalVariable *Handle) : Handle(Handle) {}
-
   GlobalVariable *Handle;
   SmallVector<CBufferMember> Members;
+
+  CBufferMapping(GlobalVariable *Handle) : Handle(Handle) {}
 };
 
 class CBufferMetadata {
diff --git a/llvm/lib/Frontend/HLSL/CBuffer.cpp b/llvm/lib/Frontend/HLSL/CBuffer.cpp
index db1003ad3da71..37c0d912e09ee 100644
--- a/llvm/lib/Frontend/HLSL/CBuffer.cpp
+++ b/llvm/lib/Frontend/HLSL/CBuffer.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Frontend/HLSL/CBuffer.h"
+#include "llvm/Frontend/HLSL/HLSLResource.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
@@ -65,6 +66,6 @@ void CBufferMetadata::eraseFromModule() {
 APInt hlsl::translateCBufArrayOffset(const DataLayout &DL, APInt Offset,
                                      ArrayType *Ty) {
   int64_t TypeSize = DL.getTypeSizeInBits(Ty->getElementType()) / 8;
-  int64_t RoundUp = alignTo(TypeSize, Align(16));
+  int64_t RoundUp = alignTo(TypeSize, Align(CBufferRowSizeInBytes));
   return Offset.udiv(TypeSize) * RoundUp;
 }
diff --git a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
index 3867502e780b8..7559f61b4cfb9 100644
--- a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
@@ -9,6 +9,7 @@
 #include "DXILCBufferAccess.h"
 #include "DirectX.h"
 #include "llvm/Frontend/HLSL/CBuffer.h"
+#include "llvm/Frontend/HLSL/HLSLResource.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicsDirectX.h"
 #include "llvm/InitializePasses.h"
@@ -93,8 +94,9 @@ static void replaceAccess(LoadInst *LI, GlobalVariable *Global,
   Type *Ty = LI->getType();
   CBufferRowIntrin Intrin(DL, Ty->getScalarType());
   // The cbuffer consists of some number of 16-byte rows.
-  unsigned int CurrentRow = Offset / 16;
-  unsigned int CurrentIndex = (Offset % 16) / Intrin.EltSize;
+  unsigned int CurrentRow = Offset / hlsl::CBufferRowSizeInBytes;
+  unsigned int CurrentIndex =
+      (Offset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;
 
   auto *CBufLoad = Builder.CreateIntrinsic(
       Intrin.RetTy, Intrin.IID,
@@ -111,10 +113,11 @@ static void replaceAccess(LoadInst *LI, GlobalVariable *Global,
     Result = Elt;
 
     // However, if we loaded a <1 x T>, then we need to adjust the type here.
-    if (auto *VT = dyn_cast<FixedVectorType>(LI->getType()))
-      if (VT->getNumElements() == 1)
-        Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
-                                             Builder.getInt32(0));
+    if (auto *VT = dyn_cast<FixedVectorType>(LI->getType())) {
+      assert(VT->getNumElements() == 1 && "Can't have multiple elements here");
+      Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
+                                           Builder.getInt32(0));
+    }
   } else {
     // Walk each element and extract it, wrapping to new rows as needed.
     SmallVector<Value *> Extracts{Elt};
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll
new file mode 100644
index 0000000000000..dbd01b323aa2a
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
+
+; cbuffer CB : register(b0) {
+;   float a1[3];
+; }
+%__cblayout_CB = type <{ [3 x float] }>
+
+ at CB.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) poison
+; CHECK: @CB.cb =
+; CHECK-NOT: external {{.*}} addrspace(2) global
+ at a1 = external addrspace(2) global [3 x float], align 4
+
+; CHECK: define void @f
+define void @f(ptr %dst) {
+entry:
+  %CB.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBs_36_0tt(i32 0, i32 0, i32 1, i32 0, i1 false)
+  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) %CB.cb_h, ptr @CB.cb, align 4
+
+  ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
+  ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
+  ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+  ; CHECK: store float [[X]], ptr %dst
+  %a1 = load float, ptr addrspace(2) getelementptr inbounds ([3 x float], ptr addrspace(2) @a1, i32 0, i32 1), align 4
+  store float %a1, ptr %dst, align 32
+
+  ret void
+}
+
+; CHECK-NOT: !hlsl.cbs =
+!hlsl.cbs = !{!0}
+
+!0 = !{ptr @CB.cb, ptr addrspace(2) @a1}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll
index 7478cc5f362dc..42d7943953b84 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll
@@ -11,9 +11,10 @@
 ;   bool a8[4];
 ; }
 %__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [2 x [3 x [4 x <4 x i32>]]], [1 x i16], [2 x i64], [4 x i32] }>
-%struct.S = type { float, <3 x double>, half, i64, <4 x i32>, i16, i64, i32, [12 x i8] }
 
 @CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) poison
+; CHECK: @CB.cb =
+; CHECK-NOT: external {{.*}} addrspace(2) global
 @a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
 @a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32
 @a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2
@@ -23,6 +24,7 @@
 @a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8
 @a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4
 
+; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
   %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false)
@@ -114,6 +116,7 @@ entry:
   ret void
 }
 
+; CHECK-NOT: !hlsl.cbs =
 !hlsl.cbs = !{!0}
 
 !0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7, ptr addrspace(2) @a8}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll
index 8abbb4a831f1e..d7272b449166d 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll
@@ -1,11 +1,13 @@
 ; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
 
 %__cblayout_CB = type <{ float }>
-%struct.S = type { float }
 
 @CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 4, 0)) poison
+; CHECK: @CB.cb =
+; CHECK-NOT: external {{.*}} addrspace(2) global
 @x = external local_unnamed_addr addrspace(2) global float, align 4
 
+; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
   ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
@@ -17,6 +19,7 @@ entry:
   ret void
 }
 
+; CHECK-NOT: !hlsl.cbs =
 !hlsl.cbs = !{!0}
 
 !0 = !{ptr @CB.cb, ptr addrspace(2) @x}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll
index e9bcc7bbad067..abe087dbe6100 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll
@@ -5,9 +5,12 @@
 ; }
 %__cblayout_CB = type <{ [3 x float] }>
 
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) poison
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) poison
+; CHECK: @CB.cb =
+; CHECK-NOT: external {{.*}} addrspace(2) global
 @a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
 
+; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
   ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
@@ -27,6 +30,7 @@ entry:
   ret void
 }
 
+; CHECK-NOT: !hlsl.cbs =
 !hlsl.cbs = !{!0}
 
 !0 = !{ptr @CB.cb, ptr addrspace(2) @a1}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll
index 8305c1e177995..125d6b66c0107 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll
@@ -10,25 +10,19 @@
 ;   int64_t a7;   // offset 24, size  8
 ; }
 %__cblayout_CB = type <{ float, i32, i32, half, i16, double, i64 }>
-%struct.Scalars = type { float, i32, i32, half, i16, double, i64 }
 
-; CHECK: @CB.cb =
 @CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) poison
-; CHECK-NOT: @a1 =
+; CHECK: @CB.cb =
+; CHECK-NOT: external {{.*}} addrspace(2) global
 @a1 = external local_unnamed_addr addrspace(2) global float, align 4
-; CHECK-NOT: @a2 =
 @a2 = external local_unnamed_addr addrspace(2) global i32, align 4
-; CHECK-NOT: @a3 =
 @a3 = external local_unnamed_addr addrspace(2) global i32, align 4
-; CHECK-NOT: @a4 =
 @a4 = external local_unnamed_addr addrspace(2) global half, align 2
-; CHECK-NOT: @a5 =
 @a5 = external local_unnamed_addr addrspace(2) global i16, align 2
-; CHECK-NOT: @a6 =
 @a6 = external local_unnamed_addr addrspace(2) global double, align 8
-; CHECK-NOT: @a7 =
 @a7 = external local_unnamed_addr addrspace(2) global i64, align 8
 
+; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
   %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false)
@@ -98,6 +92,7 @@ entry:
   ret void
 }
 
+; CHECK-NOT: !hlsl.cbs =
 !hlsl.cbs = !{!0}
 
 !0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll
index 2e599b971a6ce..6addf7482ac37 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll
@@ -9,9 +9,10 @@
 ;   uint16_t3 a6;  // offset  96, size  6 (+10)
 ; };
 %__cblayout_CB = type <{ <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16> }>
-%struct.S = type { <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16> }
 
- at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 136, 0, 16, 40, 48, 80, 96)) poison
+ at CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) poison
+; CHECK: @CB.cb =
+; CHECK-NOT: external {{.*}} addrspace(2) global
 @a1 = external local_unnamed_addr addrspace(2) global <3 x float>, align 16
 @a2 = external local_unnamed_addr addrspace(2) global <3 x double>, align 32
 @a3 = external local_unnamed_addr addrspace(2) global <2 x half>, align 4
@@ -19,10 +20,11 @@
 @a5 = external local_unnamed_addr addrspace(2) global <4 x i32>, align 16
 @a6 = external local_unnamed_addr addrspace(2) global <3 x i16>, align 8
 
+; CHECK: define void @f
 define void @f(ptr %dst) {
 entry:
-  %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 136, 0, 16, 40, 48, 80, 96)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false)
-  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 136, 0, 16, 40, 48, 80, 96)) %CB.cb_h.i.i, ptr @CB.cb, align 4
+  %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false)
+  store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) %CB.cb_h.i.i, ptr @CB.cb, align 4
 
   ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb
   ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
@@ -111,6 +113,7 @@ entry:
   ret void
 }
 
+; CHECK-NOT: !hlsl.cbs =
 !hlsl.cbs = !{!0}
 
 !0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6}



More information about the llvm-commits mailing list