[llvm] [DirectX] Support ConstExpr GEPs (PR #150082)

Farzon Lotfi via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 22 11:32:29 PDT 2025


https://github.com/farzonl created https://github.com/llvm/llvm-project/pull/150082

- Fixes #150050
- Address the issue of many nested geps
- Check for ConstantExpr GEP if we see it check if it needs a global replacement with a new type. Create the new constExpr Gep and set the pointer operand to it. Finally cleanup and remove the old nested geps.

>From ebfd7d0dc423c5784ff818aaf15a0387c93eb04a Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Tue, 15 Jul 2025 16:20:09 -0400
Subject: [PATCH] [DirectX] Support ConstExpr GEPs - Fixes #150050 - Address
 the issue of many nested geps - Check for ConstantExpr GEP if we see it check
 if it needs a global   replacement with a new type. Create the new constExpr
 Gep and set   the pointer operand to it.   Finally cleanup and remove the old
 nested geps.

---
 .../Target/DirectX/DXILDataScalarization.cpp  | 73 +++++++++++++++++
 .../bugfix_150050_data_scalarize_const_gep.ll | 80 +++++++++++++++++++
 2 files changed, 153 insertions(+)
 create mode 100644 llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll

diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index d9d9b36d0b739..5ec55a6718b9a 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -300,11 +300,84 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
   return replaceDynamicExtractElementInst(EEI);
 }
 
+static void buildConstExprGEPChain(GetElementPtrInst &GEPI, Value *CurrentPtr,
+                                   SmallVector<ConstantExpr *, 4> &GEPChain,
+                                   IRBuilder<> &Builder) {
+  // Process the rest of the chain in reverse order (skipping the innermost)
+  for (int I = GEPChain.size() - 2; I >= 0; I--) {
+    ConstantExpr *CE = GEPChain[I];
+    GetElementPtrInst *GEPInst =
+        cast<GetElementPtrInst>(CE->getAsInstruction());
+    GEPInst->insertBefore(GEPI.getIterator());
+    SmallVector<Value *, MaxVecSize> CurrIndices(GEPInst->indices());
+
+    // Create a new GEP instruction
+    Type *SourceTy = GEPInst->getSourceElementType();
+    CurrentPtr =
+        Builder.CreateGEP(SourceTy, CurrentPtr, CurrIndices, GEPInst->getName(),
+                          GEPInst->getNoWrapFlags());
+
+    // If this is the outermost GEP, update the main GEPI
+    if (I == 0) {
+      GEPI.setOperand(GEPI.getPointerOperandIndex(), CurrentPtr);
+    }
+
+    // Clean up the temporary instruction
+    GEPInst->eraseFromParent();
+  }
+}
+
 bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
   Value *PtrOperand = GEPI.getPointerOperand();
   Type *OrigGEPType = GEPI.getSourceElementType();
   Type *NewGEPType = OrigGEPType;
   bool NeedsTransform = false;
+  // Check if the pointer operand is a ConstantExpr GEP
+  if (auto *PtrOpGEPCE = dyn_cast<ConstantExpr>(PtrOperand);
+      PtrOpGEPCE && PtrOpGEPCE->getOpcode() == Instruction::GetElementPtr) {
+
+    // Collect all nested GEPs in the chain
+    SmallVector<ConstantExpr *, 4> GEPChain;
+    Value *BasePointer = PtrOpGEPCE->getOperand(0);
+    GEPChain.push_back(PtrOpGEPCE);
+
+    // Walk up the chain to find all nested GEPs and the base pointer
+    while (auto *NextGEP = dyn_cast<ConstantExpr>(BasePointer)) {
+      if (NextGEP->getOpcode() != Instruction::GetElementPtr)
+        break;
+
+      GEPChain.push_back(NextGEP);
+      BasePointer = NextGEP->getOperand(0);
+    }
+
+    // Check if the base pointer is a global that needs replacement
+    if (GlobalVariable *NewGlobal = lookupReplacementGlobal(BasePointer)) {
+      IRBuilder<> Builder(&GEPI);
+
+      // Create a new GEP for the innermost GEP (last in the chain)
+      ConstantExpr *InnerGEPCE = GEPChain.back();
+      GetElementPtrInst *InnerGEP =
+          cast<GetElementPtrInst>(InnerGEPCE->getAsInstruction());
+      InnerGEP->insertBefore(GEPI.getIterator());
+
+      SmallVector<Value *, MaxVecSize> Indices(InnerGEP->indices());
+      Type *NewGEPType = NewGlobal->getValueType();
+      Value *NewInnerGEP =
+          Builder.CreateGEP(NewGEPType, NewGlobal, Indices, InnerGEP->getName(),
+                            InnerGEP->getNoWrapFlags());
+
+      // If there's only one GEP in the chain, update the main GEPI directly
+      if (GEPChain.size() == 1)
+        GEPI.setOperand(GEPI.getPointerOperandIndex(), NewInnerGEP);
+      else
+        // For multiple GEPs, we need to create a chain of GEPs
+        buildConstExprGEPChain(GEPI, NewInnerGEP, GEPChain, Builder);
+
+      // Clean up the innermost GEP
+      InnerGEP->eraseFromParent();
+      return true;
+    }
+  }
 
   if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) {
     NewGEPType = NewGlobal->getValueType();
diff --git a/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll b/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll
new file mode 100644
index 0000000000000..156a8e7c5c386
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll
@@ -0,0 +1,80 @@
+; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.4-library %s | FileCheck %s --check-prefixes=SCHECK,CHECK
+; RUN: opt -S -passes='dxil-data-scalarization,function(scalarizer<load-store>),dxil-flatten-arrays' -mtriple=dxil-pc-shadermodel6.4-library %s | FileCheck %s --check-prefixes=FCHECK,CHECK
+
+ at aTile = hidden addrspace(3) global [10 x [10 x <4 x i32>]] zeroinitializer, align 16
+ at bTile = hidden addrspace(3) global [10 x [10 x i32]] zeroinitializer, align 16
+ at cTile = internal global [2 x [2 x <2 x i32>]] zeroinitializer, align 16
+ at dTile = internal global [2 x [2 x [2 x <2 x i32>]]] zeroinitializer, align 16
+
+define void @CSMain() {
+; CHECK-LABEL: define void @CSMain() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[AFRAGPACKED_I_SCALARIZE:%.*]] = alloca [4 x i32], align 16
+;
+; SCHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [10 x <4 x i32>], ptr addrspace(3) getelementptr inbounds ([10 x [10 x [4 x i32]]], ptr addrspace(3) @aTile.scalarized, i32 0, i32 1), i32 0, i32 2
+; SCHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(3) [[TMP0]], align 16
+; SCHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16
+;
+; FCHECK-NEXT:    [[AFRAGPACKED_I_SCALARIZE_I14:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 1
+; FCHECK-NEXT:    [[AFRAGPACKED_I_SCALARIZE_I25:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 2
+; FCHECK-NEXT:    [[AFRAGPACKED_I_SCALARIZE_I36:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 3
+; FCHECK-NEXT:    [[DOTI07:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 48), align 16
+; FCHECK-NEXT:    [[DOTI119:%.*]] = load i32, ptr addrspace(3) getelementptr ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 49), align 4
+; FCHECK-NEXT:    [[DOTI2211:%.*]] = load i32, ptr addrspace(3) getelementptr ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 50), align 8
+; FCHECK-NEXT:    [[DOTI3313:%.*]] = load i32, ptr addrspace(3) getelementptr ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 51), align 4
+; FCHECK-NEXT:    store i32 [[DOTI07]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16
+; FCHECK-NEXT:    store i32 [[DOTI119]], ptr [[AFRAGPACKED_I_SCALARIZE_I14]], align 4
+; FCHECK-NEXT:    store i32 [[DOTI2211]], ptr [[AFRAGPACKED_I_SCALARIZE_I25]], align 8
+; FCHECK-NEXT:    store i32 [[DOTI3313]], ptr [[AFRAGPACKED_I_SCALARIZE_I36]], align 4
+;
+; CHECK-NEXT:    ret void
+entry:
+  %aFragPacked.i = alloca <4 x i32>, align 16
+  %0 = load <4 x i32>, ptr addrspace(3) getelementptr inbounds ([10 x <4 x i32>], ptr addrspace(3) getelementptr inbounds ([10 x [10 x <4 x i32>]], ptr addrspace(3) @aTile, i32 0, i32 1), i32 0, i32 2), align 16
+  store <4 x i32> %0, ptr %aFragPacked.i, align 16
+  ret void
+}
+
+define void @Main() {
+; CHECK-LABEL: define void @Main() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[BFRAGPACKED_I:%.*]] = alloca i32, align 16
+;
+; SCHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [10 x i32], ptr addrspace(3) getelementptr inbounds ([10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1), i32 0, i32 1
+; SCHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(3) [[TMP0]], align 16
+; SCHECK-NEXT:    store i32 [[TMP1]], ptr [[BFRAGPACKED_I]], align 16
+;
+; FCHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([100 x i32], ptr addrspace(3) @bTile.1dim, i32 0, i32 11), align 16
+; FCHECK-NEXT:    store i32 [[TMP0]], ptr [[BFRAGPACKED_I]], align 16
+;
+; CHECK-NEXT:    ret void
+entry:
+  %bFragPacked.i = alloca i32, align 16
+  %0 = load i32, ptr addrspace(3) getelementptr inbounds ([10 x i32], ptr addrspace(3) getelementptr inbounds ([10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1), i32 0, i32 1), align 16
+  store i32 %0, ptr %bFragPacked.i, align 16
+  ret void
+}
+
+define void @global_nested_geps_3d() {
+; CHECK-LABEL: define void @global_nested_geps_3d() {
+; SCHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x [2 x i32]]], ptr @cTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1
+; SCHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+;
+; FCHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @cTile.scalarized.1dim, i32 0, i32 7), align 4
+;
+; CHECK-NEXT:    ret void
+  %1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* @cTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4
+  ret void
+}
+
+define void @global_nested_geps_4d() {
+; CHECK-LABEL: define void @global_nested_geps_4d() {
+; SCHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x <2 x i32>]], ptr getelementptr inbounds ([2 x [2 x [2 x [2 x i32]]]], ptr @dTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1
+; SCHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+;
+; FCHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @dTile.scalarized.1dim, i32 0, i32 15), align 4
+;
+; CHECK-NEXT:    ret void
+  %1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* getelementptr inbounds ([2 x [2 x [2 x <2 x i32>]]], [2 x [2 x [2 x <2 x i32>]]]* @dTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4
+  ret void
+}



More information about the llvm-commits mailing list