[llvm] [DirectX] add GEP i8 legalization (PR #142475)

Mon Jun 2 13:06:38 PDT 2025

https://github.com/farzonl created https://github.com/llvm/llvm-project/pull/142475

fixes #140415

The i8 legalization code in DXILLegalizePass's `fixI8UseChain` needs to be updated to check for i8 geps.
It seems like there are i8 GEPs being left around after we remove all the other i8 instructions and this is causing problem on validation.

Since this is cleaning up a missed GEP The approach is to assume the getPointerOperand is to an alloca we further will check if this is an array alloca then do some byte offset arithmetic to figure out the memory index to use. Finally we will emit the new gep and cleanup the old one.

Finally needed to update upcastI8AllocasAndUses to account for loads off of GEPs instead of just loads from the alloca.

>From 244b01e43f4c974c682c90d1315c59605da2b289 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Mon, 2 Jun 2025 15:40:22 -0400
Subject: [PATCH] [DirectX] add GEP i8 legalization The i8 legalization code in
 DXILLegalizePass's `fixI8UseChain` needs to be updated to check for i8 geps.
 It seems like there are i8 GEPs being left around after we remove all the
 other i8 instructions and this is causing problem on validation.

Since this is cleaning up a missed GEP The approach is to assume the
getPointerOperand is to an alloca we further will check if this is
an array alloca then do some byte offset arithmetic to figure out the
memory index to use. Finally we will emit the new gep and cleanup the
old one.

Finally needed to update upcastI8AllocasAndUses to account for loads off
of GEPs instead of just loads from the alloca.
---
 llvm/lib/Target/DirectX/DXILLegalizePass.cpp | 58 +++++++++++++++++---
 llvm/test/CodeGen/DirectX/legalize-i8.ll     | 54 ++++++++++++++++++
 2 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index 23883c936a20d..f6a80ebfc8435 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -95,6 +95,8 @@ static void fixI8UseChain(Instruction &I,
     Type *ElementType = NewOperands[0]->getType();
     if (auto *AI = dyn_cast<AllocaInst>(NewOperands[0]))
       ElementType = AI->getAllocatedType();
+    if (auto *GEP = dyn_cast<GetElementPtrInst>(NewOperands[0]))
+      ElementType = GEP->getSourceElementType();
     LoadInst *NewLoad = Builder.CreateLoad(ElementType, NewOperands[0]);
     ReplacedValues[Load] = NewLoad;
     ToRemove.push_back(Load);
@@ -164,6 +166,36 @@ static void fixI8UseChain(Instruction &I,
     if (AdjustedCast)
       Cast->replaceAllUsesWith(AdjustedCast);
   }
+  if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+    if (!GEP->getType()->isPointerTy() ||
+        !GEP->getSourceElementType()->isIntegerTy(8))
+      return;
+
+    Value *BasePtr = GEP->getPointerOperand();
+    if (ReplacedValues.count(BasePtr))
+      BasePtr = ReplacedValues[BasePtr];
+
+    Type *ElementType = BasePtr->getType();
+    if (auto *AI = dyn_cast<AllocaInst>(BasePtr))
+      ElementType = AI->getAllocatedType();
+    if (auto *ArrTy = dyn_cast<ArrayType>(ElementType))
+      ElementType = ArrTy->getArrayElementType();
+
+    ConstantInt *Offset = dyn_cast<ConstantInt>(GEP->getOperand(1));
+    // Note: the only way to convert an i8 offset to an i32 offset without
+    // emitting code Would be to emit code. We sould expect this value to be a
+    // ConstantInt since Offsets are very regulalrly converted.
+    assert(Offset && "Offset is expected to be a ConstantInt");
+    uint32_t ByteOffset = Offset->getZExtValue();
+    uint32_t ElemSize = GEP->getDataLayout().getTypeAllocSize(ElementType);
+    assert(ElemSize > 0 && "ElementSize must be set");
+    uint32_t Index = ByteOffset / ElemSize;
+    Value *NewGEP =
+        Builder.CreateGEP(ElementType, BasePtr, Builder.getInt32(Index),
+                          GEP->getName(), GEP->getNoWrapFlags());
+    ReplacedValues[GEP] = NewGEP;
+    ToRemove.push_back(GEP);
+  }
 }
 
 static void upcastI8AllocasAndUses(Instruction &I,
@@ -175,15 +207,12 @@ static void upcastI8AllocasAndUses(Instruction &I,
 
   Type *SmallestType = nullptr;
 
-  for (User *U : AI->users()) {
-    auto *Load = dyn_cast<LoadInst>(U);
-    if (!Load)
-      continue;
+  auto ProcessLoad = [&](LoadInst *Load) {
     for (User *LU : Load->users()) {
       Type *Ty = nullptr;
-      if (auto *Cast = dyn_cast<CastInst>(LU))
+      if (auto *Cast = dyn_cast<CastInst>(LU)) {
         Ty = Cast->getType();
-      if (CallInst *CI = dyn_cast<CallInst>(LU)) {
+      } else if (auto *CI = dyn_cast<CallInst>(LU)) {
         if (CI->getIntrinsicID() == Intrinsic::memset)
           Ty = Type::getInt32Ty(CI->getContext());
       }
@@ -191,9 +220,22 @@ static void upcastI8AllocasAndUses(Instruction &I,
       if (!Ty)
         continue;
 
-      if (!SmallestType ||
-          Ty->getPrimitiveSizeInBits() < SmallestType->getPrimitiveSizeInBits())
+      if (!SmallestType || Ty->getPrimitiveSizeInBits() <
+                               SmallestType->getPrimitiveSizeInBits()) {
         SmallestType = Ty;
+      }
+    }
+  };
+
+  for (User *U : AI->users()) {
+    if (auto *Load = dyn_cast<LoadInst>(U))
+      ProcessLoad(Load);
+    else if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      for (User *GU : GEP->users()) {
+        if (auto *Load = dyn_cast<LoadInst>(GU)) {
+          ProcessLoad(Load);
+        }
+      }
     }
   }
 
diff --git a/llvm/test/CodeGen/DirectX/legalize-i8.ll b/llvm/test/CodeGen/DirectX/legalize-i8.ll
index 2602be778cd86..d1d76ccf5c76c 100644
--- a/llvm/test/CodeGen/DirectX/legalize-i8.ll
+++ b/llvm/test/CodeGen/DirectX/legalize-i8.ll
@@ -106,3 +106,57 @@ define i32 @all_imm() {
   %2 = sext i8 %1 to i32
   ret i32 %2
 }
+
+define i32 @scalar_i8_geps() {
+  ; CHECK-LABEL: define i32 @scalar_i8_geps(
+  ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4
+  ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[ALLOCA]], i32 0
+  ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+  ; CHECK-NEXT:    ret i32 [[LOAD]]
+    %1 = alloca i8, align 4
+    %2 = getelementptr inbounds nuw i8, ptr %1, i32 0
+    %3 = load i8, ptr %2
+    %4 = sext i8 %3 to i32
+    ret i32 %4
+}
+
+define i32 @i8_geps_index0() {
+  ; CHECK-LABEL: define i32 @i8_geps_index0(
+  ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [2 x i32], align 8
+  ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[ALLOCA]], i32 0
+  ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+  ; CHECK-NEXT:    ret i32 [[LOAD]]
+  %1 = alloca [2 x i32], align 8
+  %2 = getelementptr inbounds nuw i8, ptr %1, i32 0
+  %3 = load i8, ptr %2
+  %4 = sext i8 %3 to i32
+  ret i32 %4
+}
+
+define i32 @i8_geps_index1() {
+  ; CHECK-LABEL: define i32 @i8_geps_index1(
+  ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [2 x i32], align 8
+  ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[ALLOCA]], i32 1
+  ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[GEP]]
+  ; CHECK-NEXT:    ret i32 [[LOAD]]
+  %1 = alloca [2 x i32], align 8
+  %2 = getelementptr inbounds nuw i8, ptr %1, i32 4
+  %3 = load i8, ptr %2
+  %4 = sext i8 %3 to i32
+  ret i32 %4
+}
+
+define i32 @i8_gep_store() {
+  ; CHECK-LABEL: define i32 @i8_gep_store(
+  ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [2 x i32], align 8
+  ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[ALLOCA]], i32 1
+  ; CHECK-NEXT:    store i32 1, ptr [[GEP]], align 4
+  ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[GEP]]
+  ; CHECK-NEXT:    ret i32 [[LOAD]]
+  %1 = alloca [2 x i32], align 8
+  %2 = getelementptr inbounds nuw i8, ptr %1, i32 4
+  store i8 1, ptr %2
+  %3 = load i8, ptr %2
+  %4 = sext i8 %3 to i32
+  ret i32 %4
+}