[llvm] 9bc4355 - [SLSR] Always generate i8 GEPs

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 10 02:31:39 PST 2024


Author: Nikita Popov
Date: 2024-01-10T11:31:27+01:00
New Revision: 9bc4355f091b530625ec6839a8c4858b6de4f1b4

URL: https://github.com/llvm/llvm-project/commit/9bc4355f091b530625ec6839a8c4858b6de4f1b4
DIFF: https://github.com/llvm/llvm-project/commit/9bc4355f091b530625ec6839a8c4858b6de4f1b4.diff

LOG: [SLSR] Always generate i8 GEPs

Always generate canonical i8 GEPs. Especially as this is a backend
pass, trying to generate a "nice" GEP representation is not useful.

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
    llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
    llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/reassociate-geps-and-slsr.ll
    llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index ca1f3a0c0ae342..2cce6eb22341c2 100644
--- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -233,13 +233,9 @@ class StraightLineStrengthReduce {
   void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize,
                         GetElementPtrInst *GEP);
 
-  // Emit code that computes the "bump" from Basis to C. If the candidate is a
-  // GEP and the bump is not divisible by the element size of the GEP, this
-  // function sets the BumpWithUglyGEP flag to notify its caller to bump the
-  // basis using an ugly GEP.
+  // Emit code that computes the "bump" from Basis to C.
   static Value *emitBump(const Candidate &Basis, const Candidate &C,
-                         IRBuilder<> &Builder, const DataLayout *DL,
-                         bool &BumpWithUglyGEP);
+                         IRBuilder<> &Builder, const DataLayout *DL);
 
   const DataLayout *DL = nullptr;
   DominatorTree *DT = nullptr;
@@ -581,26 +577,11 @@ static void unifyBitWidth(APInt &A, APInt &B) {
 Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
                                             const Candidate &C,
                                             IRBuilder<> &Builder,
-                                            const DataLayout *DL,
-                                            bool &BumpWithUglyGEP) {
+                                            const DataLayout *DL) {
   APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue();
   unifyBitWidth(Idx, BasisIdx);
   APInt IndexOffset = Idx - BasisIdx;
 
-  BumpWithUglyGEP = false;
-  if (Basis.CandidateKind == Candidate::GEP) {
-    APInt ElementSize(
-        IndexOffset.getBitWidth(),
-        DL->getTypeAllocSize(
-            cast<GetElementPtrInst>(Basis.Ins)->getResultElementType()));
-    APInt Q, R;
-    APInt::sdivrem(IndexOffset, ElementSize, Q, R);
-    if (R == 0)
-      IndexOffset = Q;
-    else
-      BumpWithUglyGEP = true;
-  }
-
   // Compute Bump = C - Basis = (i' - i) * S.
   // Common case 1: if (i' - i) is 1, Bump = S.
   if (IndexOffset == 1)
@@ -645,8 +626,7 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
     return;
 
   IRBuilder<> Builder(C.Ins);
-  bool BumpWithUglyGEP;
-  Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP);
+  Value *Bump = emitBump(Basis, C, Builder, DL);
   Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
   switch (C.CandidateKind) {
   case Candidate::Add:
@@ -673,28 +653,13 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
     }
     break;
   }
-  case Candidate::GEP:
-    {
-    Type *OffsetTy = DL->getIndexType(C.Ins->getType());
+  case Candidate::GEP: {
     bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
-    if (BumpWithUglyGEP) {
-      // C = (char *)Basis + Bump
-      unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
-      Type *CharTy = PointerType::get(Basis.Ins->getContext(), AS);
-      Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
-      Reduced =
-          Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds);
-      Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
-    } else {
-      // C = gep Basis, Bump
-      // Canonicalize bump to pointer size.
-      Bump = Builder.CreateSExtOrTrunc(Bump, OffsetTy);
-      Reduced = Builder.CreateGEP(
-          cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(), Basis.Ins,
-          Bump, "", InBounds);
-    }
-      break;
-    }
+    // C = (char *)Basis + Bump
+    Reduced =
+        Builder.CreateGEP(Builder.getInt8Ty(), Basis.Ins, Bump, "", InBounds);
+    break;
+  }
   default:
     llvm_unreachable("C.CandidateKind is invalid");
   };

diff  --git a/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll b/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
index 6792f807a7458e..9cf725840abdc3 100644
--- a/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
+++ b/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
@@ -13,8 +13,9 @@ define amdgpu_kernel void @slsr_after_reassociate_global_geps_mubuf_max_offset(p
 ; CHECK-NEXT:    [[P12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[TMP1]], i64 1023
 ; CHECK-NEXT:    [[V11:%.*]] = load i32, ptr addrspace(1) [[P12]], align 4
 ; CHECK-NEXT:    store i32 [[V11]], ptr addrspace(1) [[OUT]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr addrspace(1) [[TMP1]], i64 [[TMP0]]
-; CHECK-NEXT:    [[P24:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[TMP2]], i64 1023
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP1]], i64 [[TMP2]]
+; CHECK-NEXT:    [[P24:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[TMP3]], i64 1023
 ; CHECK-NEXT:    [[V22:%.*]] = load i32, ptr addrspace(1) [[P24]], align 4
 ; CHECK-NEXT:    store i32 [[V22]], ptr addrspace(1) [[OUT]], align 4
 ; CHECK-NEXT:    ret void
@@ -78,8 +79,9 @@ define amdgpu_kernel void @slsr_after_reassociate_lds_geps_ds_max_offset(ptr add
 ; CHECK-NEXT:    [[P12:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP0]], i32 16383
 ; CHECK-NEXT:    [[V11:%.*]] = load i32, ptr addrspace(3) [[P12]], align 4
 ; CHECK-NEXT:    store i32 [[V11]], ptr addrspace(1) [[OUT]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr addrspace(3) [[TMP0]], i32 [[I]]
-; CHECK-NEXT:    [[P24:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP1]], i32 16383
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[I]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP0]], i32 [[TMP1]]
+; CHECK-NEXT:    [[P24:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP2]], i32 16383
 ; CHECK-NEXT:    [[V22:%.*]] = load i32, ptr addrspace(3) [[P24]], align 4
 ; CHECK-NEXT:    store i32 [[V22]], ptr addrspace(1) [[OUT]], align 4
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/reassociate-geps-and-slsr.ll b/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/reassociate-geps-and-slsr.ll
index de085ef10c54fb..e65b9b1a99a6d8 100644
--- a/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/reassociate-geps-and-slsr.ll
+++ b/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/reassociate-geps-and-slsr.ll
@@ -35,16 +35,17 @@ define void @slsr_after_reassociate_geps(ptr %arr, i32 %i) {
 ; CHECK-NEXT:    [[P12:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 5
 ; CHECK-NEXT:    [[V1:%.*]] = load float, ptr [[P12]], align 4
 ; CHECK-NEXT:    call void @foo(float [[V1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr float, ptr [[TMP2]], i64 [[TMP1]]
-; CHECK-NEXT:    [[P24:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 5
+; CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP3]]
+; CHECK-NEXT:    [[P24:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 5
 ; CHECK-NEXT:    [[V2:%.*]] = load float, ptr [[P24]], align 4
 ; CHECK-NEXT:    call void @foo(float [[V2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr float, ptr [[TMP3]], i64 [[TMP1]]
-; CHECK-NEXT:    [[P36:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 5
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 [[TMP3]]
+; CHECK-NEXT:    [[P36:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 5
 ; CHECK-NEXT:    [[V3:%.*]] = load float, ptr [[P36]], align 4
 ; CHECK-NEXT:    call void @foo(float [[V3]])
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr float, ptr [[TMP4]], i64 [[TMP1]]
-; CHECK-NEXT:    [[P48:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 5
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i64 [[TMP3]]
+; CHECK-NEXT:    [[P48:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 5
 ; CHECK-NEXT:    [[V4:%.*]] = load float, ptr [[P48]], align 4
 ; CHECK-NEXT:    call void @foo(float [[V4]])
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll
index b446a273d9bd50..7cd45329c24fe2 100644
--- a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll
+++ b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll
@@ -19,7 +19,8 @@ define void @slsr_gep(ptr %input, i64 %s) {
 ; CHECK-NEXT:    call void @foo(ptr [[INPUT]])
 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i32, ptr [[INPUT]], i64 [[S]]
 ; CHECK-NEXT:    call void @foo(ptr [[P1]])
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[S]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[S]], 2
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP1]]
 ; CHECK-NEXT:    call void @foo(ptr [[P2]])
 ; CHECK-NEXT:    ret void
 ;
@@ -55,7 +56,8 @@ define void @slsr_gep_sext(ptr %input, i32 %s) {
 ; CHECK-NEXT:    [[T:%.*]] = sext i32 [[S]] to i64
 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i32, ptr [[INPUT]], i64 [[T]]
 ; CHECK-NEXT:    call void @foo(ptr [[P1]])
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[T]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[T]], 2
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP1]]
 ; CHECK-NEXT:    call void @foo(ptr [[P2]])
 ; CHECK-NEXT:    ret void
 ;
@@ -92,10 +94,10 @@ define void @slsr_gep_2d(ptr %input, i64 %s, i64 %t) {
 ; CHECK-SAME: ptr [[INPUT:%.*]], i64 [[S:%.*]], i64 [[T:%.*]]) {
 ; CHECK-NEXT:    [[P0:%.*]] = getelementptr inbounds [10 x [5 x i32]], ptr [[INPUT]], i64 0, i64 [[S]], i64 [[T]]
 ; CHECK-NEXT:    call void @foo(ptr [[P0]])
-; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[S]], 5
-; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i32, ptr [[P0]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[S]], 20
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 [[TMP1]]
 ; CHECK-NEXT:    call void @foo(ptr [[P1]])
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP1]]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP1]]
 ; CHECK-NEXT:    call void @foo(ptr [[P2]])
 ; CHECK-NEXT:    ret void
 ;
@@ -160,7 +162,8 @@ define void @slsr_out_of_bounds_gep(ptr %input, i32 %s) {
 ; CHECK-NEXT:    [[T:%.*]] = sext i32 [[S]] to i64
 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr i32, ptr [[INPUT]], i64 [[T]]
 ; CHECK-NEXT:    call void @foo(ptr [[P1]])
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P1]], i64 [[T]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[T]], 2
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP1]]
 ; CHECK-NEXT:    call void @foo(ptr [[P2]])
 ; CHECK-NEXT:    ret void
 ;
@@ -238,7 +241,8 @@ define void @slsr_gep_fat_pointer(ptr addrspace(2) %input, i32 %s) {
 ; CHECK-SAME: ptr addrspace(2) [[INPUT:%.*]], i32 [[S:%.*]]) {
 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[INPUT]], i32 [[S]]
 ; CHECK-NEXT:    call void @baz2(ptr addrspace(2) [[P1]])
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[P1]], i32 [[S]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[S]], 2
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, ptr addrspace(2) [[P1]], i32 [[TMP1]]
 ; CHECK-NEXT:    call void @baz2(ptr addrspace(2) [[P2]])
 ; CHECK-NEXT:    ret void
 ;


        


More information about the llvm-commits mailing list