[llvm] 5411ebd - [DirectX] add GEP i8 legalization (#142475)

Wed Jun 4 12:31:19 PDT 2025

Author: Farzon Lotfi
Date: 2025-06-04T15:31:15-04:00
New Revision: 5411ebdebc445b32982b75a94d347b5fcdd8ece9

URL: https://github.com/llvm/llvm-project/commit/5411ebdebc445b32982b75a94d347b5fcdd8ece9
DIFF: https://github.com/llvm/llvm-project/commit/5411ebdebc445b32982b75a94d347b5fcdd8ece9.diff

LOG: [DirectX] add GEP i8 legalization (#142475)

fixes #140415

The i8 legalization code in DXILLegalizePass's `fixI8UseChain` needs to
be updated to check for i8 geps.
It seems like there are i8 GEPs being left around after we remove all
the other i8 instructions and this is causing problem on validation.

Since this is cleaning up a missed GEP The approach is to assume the
getPointerOperand is to an alloca we further will check if this is an
array alloca then do some byte offset arithmetic to figure out the
memory index to use. Finally we will emit the new gep and cleanup the
old one.

Finally needed to update upcastI8AllocasAndUses to account for loads off
of GEPs instead of just loads from the alloca.

Added: 
    

Modified: 
    llvm/lib/Target/DirectX/DXILLegalizePass.cpp
    llvm/test/CodeGen/DirectX/legalize-i8.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index 23883c936a20d..3e21f3c109456 100644

--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -87,20 +87,63 @@ static void fixI8UseChain(Instruction &I,
     return;
   }
 
-  if (auto *Load = dyn_cast<LoadInst>(&I)) {
-    if (!I.getType()->isIntegerTy(8))
-      return;
+  if (auto *Load = dyn_cast<LoadInst>(&I);
+      Load && I.getType()->isIntegerTy(8)) {
     SmallVector<Value *> NewOperands;
     ProcessOperands(NewOperands);
     Type *ElementType = NewOperands[0]->getType();
     if (auto *AI = dyn_cast<AllocaInst>(NewOperands[0]))
       ElementType = AI->getAllocatedType();
+    if (auto *GEP = dyn_cast<GetElementPtrInst>(NewOperands[0])) {
+      ElementType = GEP->getSourceElementType();
+      if (ElementType->isArrayTy())
+        ElementType = ElementType->getArrayElementType();
+    }
     LoadInst *NewLoad = Builder.CreateLoad(ElementType, NewOperands[0]);
     ReplacedValues[Load] = NewLoad;
     ToRemove.push_back(Load);
     return;
   }
 
+  if (auto *Load = dyn_cast<LoadInst>(&I);
+      Load && isa<ConstantExpr>(Load->getPointerOperand())) {
+    auto *CE = dyn_cast<ConstantExpr>(Load->getPointerOperand());
+    if (!(CE->getOpcode() == Instruction::GetElementPtr))
+      return;
+    auto *GEP = dyn_cast<GEPOperator>(CE);
+    if (!GEP->getSourceElementType()->isIntegerTy(8))
+      return;
+
+    Type *ElementType = Load->getType();
+    ConstantInt *Offset = dyn_cast<ConstantInt>(GEP->getOperand(1));
+    uint32_t ByteOffset = Offset->getZExtValue();
+    uint32_t ElemSize = Load->getDataLayout().getTypeAllocSize(ElementType);
+    uint32_t Index = ByteOffset / ElemSize;
+
+    Value *PtrOperand = GEP->getPointerOperand();
+    Type *GEPType = GEP->getPointerOperandType();
+
+    if (auto *GV = dyn_cast<GlobalVariable>(PtrOperand))
+      GEPType = GV->getValueType();
+    if (auto *AI = dyn_cast<AllocaInst>(PtrOperand))
+      GEPType = AI->getAllocatedType();
+
+    if (auto *ArrTy = dyn_cast<ArrayType>(GEPType))
+      GEPType = ArrTy;
+    else
+      GEPType = ArrayType::get(ElementType, 1); // its a scalar
+
+    Value *NewGEP = Builder.CreateGEP(
+        GEPType, PtrOperand, {Builder.getInt32(0), Builder.getInt32(Index)},
+        GEP->getName(), GEP->getNoWrapFlags());
+
+    LoadInst *NewLoad = Builder.CreateLoad(ElementType, NewGEP);
+    ReplacedValues[Load] = NewLoad;
+    Load->replaceAllUsesWith(NewLoad);
+    ToRemove.push_back(Load);
+    return;
+  }
+
   if (auto *BO = dyn_cast<BinaryOperator>(&I)) {
     if (!I.getType()->isIntegerTy(8))
       return;
@@ -155,6 +198,7 @@ static void fixI8UseChain(Instruction &I,
       Cast->replaceAllUsesWith(Replacement);
       return;
     }
+
     Value *AdjustedCast = nullptr;
     if (Cast->getOpcode() == Instruction::ZExt)
       AdjustedCast = Builder.CreateZExtOrTrunc(Replacement, Cast->getType());
@@ -164,6 +208,45 @@ static void fixI8UseChain(Instruction &I,
     if (AdjustedCast)
       Cast->replaceAllUsesWith(AdjustedCast);
   }
+  if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+    if (!GEP->getType()->isPointerTy() ||
+        !GEP->getSourceElementType()->isIntegerTy(8))
+      return;
+
+    Value *BasePtr = GEP->getPointerOperand();
+    if (ReplacedValues.count(BasePtr))
+      BasePtr = ReplacedValues[BasePtr];
+
+    Type *ElementType = BasePtr->getType();
+
+    if (auto *AI = dyn_cast<AllocaInst>(BasePtr))
+      ElementType = AI->getAllocatedType();
+    if (auto *GV = dyn_cast<GlobalVariable>(BasePtr))
+      ElementType = GV->getValueType();
+
+    Type *GEPType = ElementType;
+    if (auto *ArrTy = dyn_cast<ArrayType>(ElementType))
+      ElementType = ArrTy->getArrayElementType();
+    else
+      GEPType = ArrayType::get(ElementType, 1); // its a scalar
+
+    ConstantInt *Offset = dyn_cast<ConstantInt>(GEP->getOperand(1));
+    // Note: i8 to i32 offset conversion without emitting IR requires constant
+    // ints. Since offset conversion is common, we can safely assume Offset is
+    // always a ConstantInt, so no need to have a conditional bail out on
+    // nullptr, instead assert this is the case.
+    assert(Offset && "Offset is expected to be a ConstantInt");
+    uint32_t ByteOffset = Offset->getZExtValue();
+    uint32_t ElemSize = GEP->getDataLayout().getTypeAllocSize(ElementType);
+    assert(ElemSize > 0 && "ElementSize must be set");
+    uint32_t Index = ByteOffset / ElemSize;
+    Value *NewGEP = Builder.CreateGEP(
+        GEPType, BasePtr, {Builder.getInt32(0), Builder.getInt32(Index)},
+        GEP->getName(), GEP->getNoWrapFlags());
+    ReplacedValues[GEP] = NewGEP;
+    GEP->replaceAllUsesWith(NewGEP);
+    ToRemove.push_back(GEP);
+  }
 }
 
 static void upcastI8AllocasAndUses(Instruction &I,
@@ -175,15 +258,12 @@ static void upcastI8AllocasAndUses(Instruction &I,
 
   Type *SmallestType = nullptr;
 
-  for (User *U : AI->users()) {
-    auto *Load = dyn_cast<LoadInst>(U);
-    if (!Load)
-      continue;
+  auto ProcessLoad = [&](LoadInst *Load) {
     for (User *LU : Load->users()) {
       Type *Ty = nullptr;
-      if (auto *Cast = dyn_cast<CastInst>(LU))
+      if (CastInst *Cast = dyn_cast<CastInst>(LU))
         Ty = Cast->getType();
-      if (CallInst *CI = dyn_cast<CallInst>(LU)) {
+      else if (CallInst *CI = dyn_cast<CallInst>(LU)) {
         if (CI->getIntrinsicID() == Intrinsic::memset)
           Ty = Type::getInt32Ty(CI->getContext());
       }
@@ -195,6 +275,17 @@ static void upcastI8AllocasAndUses(Instruction &I,
           Ty->getPrimitiveSizeInBits() < SmallestType->getPrimitiveSizeInBits())
         SmallestType = Ty;
     }
+  };
+
+  for (User *U : AI->users()) {
+    if (auto *Load = dyn_cast<LoadInst>(U))
+      ProcessLoad(Load);
+    else if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      for (User *GU : GEP->users()) {
+        if (auto *Load = dyn_cast<LoadInst>(GU))
+          ProcessLoad(Load);
+      }
+    }
   }
 
   if (!SmallestType)

diff  --git a/llvm/test/CodeGen/DirectX/legalize-i8.ll b/llvm/test/CodeGen/DirectX/legalize-i8.ll
index 2602be778cd86..f8aa2c5ecd932 100644
--- a/llvm/test/CodeGen/DirectX/legalize-i8.ll
+++ b/llvm/test/CodeGen/DirectX/legalize-i8.ll
@@ -106,3 +106,75 @@ define i32 @all_imm() {
   %2 = sext i8 %1 to i32
   ret i32 %2
 }
+
+define i32 @scalar_i8_geps() {
+  ; CHECK-LABEL: define i32 @scalar_i8_geps(
+  ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4
+  ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw [1 x i32], ptr [[ALLOCA]], i32 0, i32 0
+  ; CHECK:         [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+  ; CHECK-NEXT:    ret i32 [[LOAD]]
+    %1 = alloca i8, align 4
+    %2 = getelementptr inbounds nuw i8, ptr %1, i32 0
+    %3 = load i8, ptr %2
+    %4 = sext i8 %3 to i32
+    ret i32 %4
+}
+
+define i32 @i8_geps_index0() {
+  ; CHECK-LABEL: define i32 @i8_geps_index0(
+  ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [2 x i32], align 8
+  ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw [2 x i32], ptr [[ALLOCA]], i32 0, i32 0
+  ; CHECK:         [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+  ; CHECK-NEXT:    ret i32 [[LOAD]]
+  %1 = alloca [2 x i32], align 8
+  %2 = getelementptr inbounds nuw i8, ptr %1, i32 0
+  %3 = load i8, ptr %2
+  %4 = sext i8 %3 to i32
+  ret i32 %4
+}
+
+define i32 @i8_geps_index1() {
+  ; CHECK-LABEL: define i32 @i8_geps_index1(
+  ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [2 x i32], align 8
+  ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw [2 x i32], ptr [[ALLOCA]], i32 0, i32 1
+  ; CHECK:         [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+  ; CHECK-NEXT:    ret i32 [[LOAD]]
+  %1 = alloca [2 x i32], align 8
+  %2 = getelementptr inbounds nuw i8, ptr %1, i32 4
+  %3 = load i8, ptr %2
+  %4 = sext i8 %3 to i32
+  ret i32 %4
+}
+
+define i32 @i8_gep_store() {
+  ; CHECK-LABEL: define i32 @i8_gep_store(
+  ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [2 x i32], align 8
+  ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw [2 x i32], ptr [[ALLOCA]], i32 0, i32 1
+  ; CHECK-NEXT:    store i32 1, ptr [[GEP]], align 4
+  ; CHECK:         [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+  ; CHECK-NEXT:    ret i32 [[LOAD]]
+  %1 = alloca [2 x i32], align 8
+  %2 = getelementptr inbounds nuw i8, ptr %1, i32 4
+  store i8 1, ptr %2
+  %3 = load i8, ptr %2
+  %4 = sext i8 %3 to i32
+  ret i32 %4
+}
+
+ at g = local_unnamed_addr addrspace(3) global [2 x float] zeroinitializer, align 4
+define float @i8_gep_global_index() {
+  ; CHECK-LABEL: define float @i8_gep_global_index(
+  ; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr addrspace(3) getelementptr inbounds nuw ([2 x float], ptr addrspace(3) @g, i32 0, i32 1), align 4
+  ; CHECK-NEXT:    ret float [[LOAD]]
+  %1 = getelementptr inbounds nuw i8, ptr addrspace(3) @g, i32 4
+  %2 = load float, ptr addrspace(3) %1, align 4
+  ret float %2
+}
+
+define float @i8_gep_global_constexpr() {
+  ; CHECK-LABEL: define float @i8_gep_global_constexpr(
+  ; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr addrspace(3) getelementptr inbounds nuw ([2 x float], ptr addrspace(3) @g, i32 0, i32 1), align 4
+  ; CHECK-NEXT: ret float [[LOAD]]
+  %1 = load float, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g, i32 4), align 4
+  ret float %1
+}