[llvm] 39a0677 - [AggressiveInstCombine] folding load for constant global patterened arrays and structs by GEP-indices

Fri May 12 03:02:41 PDT 2023

Author: khei4
Date: 2023-05-12T19:02:28+09:00
New Revision: 39a0677784d1b53f2d6e33af2a53e915f3f62c86

URL: https://github.com/llvm/llvm-project/commit/39a0677784d1b53f2d6e33af2a53e915f3f62c86
DIFF: https://github.com/llvm/llvm-project/commit/39a0677784d1b53f2d6e33af2a53e915f3f62c86.diff

LOG:     [AggressiveInstCombine] folding load for constant global patterened arrays and structs by GEP-indices
    Differential Revision: https://reviews.llvm.org/D146622
    Fixes https://github.com/llvm/llvm-project/issues/61615
    Reviewed By: nikic

Added: 
    

Modified: 
    llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
    llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 3c53c7adb29c4..1fd1567caec24 100644

--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -821,6 +821,48 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
   return true;
 }
 
+// Calculate GEP Stride and accumulated const ModOffset. Return Stride and
+// ModOffset
+static std::pair<APInt, APInt>
+getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL) {
+  unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
+  std::optional<APInt> Stride;
+  APInt ModOffset(BW, 0);
+  // Return a minimum gep stride, greatest common divisor of consective gep
+  // index scales(c.f. Bézout's identity).
+  while (auto *GEP = dyn_cast<GEPOperator>(PtrOp)) {
+    MapVector<Value *, APInt> VarOffsets;
+    if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset))
+      break;
+
+    for (auto [V, Scale] : VarOffsets) {
+      // Only keep a power of two factor for non-inbounds
+      if (!GEP->isInBounds())
+        Scale = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());
+
+      if (!Stride)
+        Stride = Scale;
+      else
+        Stride = APIntOps::GreatestCommonDivisor(*Stride, Scale);
+    }
+
+    PtrOp = GEP->getPointerOperand();
+  }
+
+  // Check whether pointer arrives back at Global Variable via at least one GEP.
+  // Even if it doesn't, we can check by alignment.
+  if (!isa<GlobalVariable>(PtrOp) || !Stride)
+    return {APInt(BW, 1), APInt(BW, 0)};
+
+  // In consideration of signed GEP indices, non-negligible offset become
+  // remainder of division by minimum GEP stride.
+  ModOffset = ModOffset.srem(*Stride);
+  if (ModOffset.isNegative())
+    ModOffset += *Stride;
+
+  return {*Stride, ModOffset};
+}
+
 /// If C is a constant patterned array and all valid loaded results for given
 /// alignment are same to a constant, return that constant.
 static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
@@ -835,29 +877,24 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
   if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
     return false;
 
-  Type *LoadTy = LI->getType();
-  Constant *C = GV->getInitializer();
-
   // Bail for large initializers in excess of 4K to avoid too many scans.
+  Constant *C = GV->getInitializer();
   uint64_t GVSize = DL.getTypeAllocSize(C->getType());
   if (!GVSize || 4096 < GVSize)
     return false;
 
-  // Check whether pointer arrives back at Global Variable.
-  // If PtrOp is neither GlobalVariable nor GEP, it might not arrive back at
-  // GlobalVariable.
-  // TODO: implement GEP handling
+  Type *LoadTy = LI->getType();
   unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
-  // TODO: Determine stride based on GEPs.
-  APInt Stride(BW, 1);
-  APInt ConstOffset(BW, 0);
+  auto [Stride, ConstOffset] = getStrideAndModOffsetOfGEP(PtrOp, DL);
 
   // Any possible offset could be multiple of GEP stride. And any valid
   // offset is multiple of load alignment, so checking only multiples of bigger
   // one is sufficient to say results' equality.
   if (auto LA = LI->getAlign();
-      LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value())
+      LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) {
+    ConstOffset = APInt(BW, 0);
     Stride = APInt(BW, LA.value());
+  }
 
   Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL);
   if (!Ca)

diff  --git a/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll b/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll
index 7acc6109744ca..e43dad2ca0893 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll
@@ -12,6 +12,18 @@
 @constpackedstruct = internal constant <{[8 x i8]}>  <{[8 x i8] c"\01\00\01\00\01\00\01\00"}>, align 4
 @conststruct = internal constant {i16, [8 x i8]}  {i16 1, [8 x i8] c"\01\00\01\00\01\00\01\00"}, align 4
 
+%struct = type { i128 }
+ at global = internal constant %struct { i128 1 }
+; TODO: this should be folded, but currently i128 is not folded.
+define i32 @no-gep-128-struct(i64 %idx){
+; CHECK-LABEL: @no-gep-128-struct(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @global, align 4
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %1 = load i32, ptr @global, align 4
+  ret i32 %1
+}
+
 define i8 @inbounds_gep_load_i8_align2(i64 %idx){
 ; CHECK-LABEL: @inbounds_gep_load_i8_align2(
 ; CHECK-NEXT:    ret i8 1
@@ -48,47 +60,50 @@ define i8 @inbounds_gep_load_i8_align2_volatile(i64 %idx){
 declare ptr @llvm.ptrmask.p0.i64(ptr , i64)
 
 ; can't be folded because ptrmask can change ptr, while preserving provenance
-define i8 @inbounds_gep_load_i8_align2_ptrmasked(i64 %idx, i64 %mask){
-; CHECK-LABEL: @inbounds_gep_load_i8_align2_ptrmasked(
-; CHECK-NEXT:    ret i8 1
+; This invalidates GEP indices analysis
+define i8 @inbounds_gep_load_i16_align1_ptrmasked(i64 %idx, i64 %mask){
+; CHECK-LABEL: @inbounds_gep_load_i16_align1_ptrmasked(
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr @constarray1, i64 [[MASK:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDX:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1
+; CHECK-NEXT:    ret i8 [[TMP3]]
 ;
   %1 = call ptr @llvm.ptrmask.p0.i64(ptr @constarray1, i64 %mask)
-  %2 = getelementptr inbounds i8, ptr %1, i64 %idx
-  %3 = load i8, ptr %2, align 2
+  %2 = getelementptr inbounds i16, ptr %1, i64 %idx
+  %3 = load i8, ptr %2, align 1
   ret i8 %3
 }
 
-; TODO: this will be ret i32 65537(LE), 16777472(BE)
 define i32 @inbounds_gep_i16_load_i32_align1(i64 %idx){
-; CHECK-LABEL: @inbounds_gep_i16_load_i32_align1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr @constarray1, i64 [[IDX:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; LE-LABEL: @inbounds_gep_i16_load_i32_align1(
+; LE-NEXT:    ret i32 65537
+;
+; BE-LABEL: @inbounds_gep_i16_load_i32_align1(
+; BE-NEXT:    ret i32 16777472
 ;
   %1 = getelementptr inbounds i16, ptr @constarray1, i64 %idx
   %2 = load i32, ptr %1, align 1
   ret i32 %2
 }
 
-; TODO: this will be ret i32 65537(LE), 16777472(BE)
 define i32 @inbounds_gep_i32_load_i32_align8(i64 %idx){
-; CHECK-LABEL: @inbounds_gep_i32_load_i32_align8(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr @constarray1, i64 [[IDX:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; LE-LABEL: @inbounds_gep_i32_load_i32_align8(
+; LE-NEXT:    ret i32 65537
+;
+; BE-LABEL: @inbounds_gep_i32_load_i32_align8(
+; BE-NEXT:    ret i32 16777472
 ;
   %1 = getelementptr inbounds i32, ptr @constarray1, i64 %idx
   %2 = load i32, ptr %1, align 8
   ret i32 %2
 }
 
-; TODO: this will be ret i32 65547(LE), 16777472(BE)
 define i32 @inbounds_gep_i32_load_i32_const_offset(i64 %idx){
-; CHECK-LABEL: @inbounds_gep_i32_load_i32_const_offset(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr @constarray2, i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDX:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-; CHECK-NEXT:    ret i32 [[TMP3]]
+; LE-LABEL: @inbounds_gep_i32_load_i32_const_offset(
+; LE-NEXT:    ret i32 65537
+;
+; BE-LABEL: @inbounds_gep_i32_load_i32_const_offset(
+; BE-NEXT:    ret i32 16777472
 ;
   %1 = getelementptr inbounds i16, ptr @constarray2, i64 1
   %2 = getelementptr inbounds i32, ptr %1, i64 %idx
@@ -125,13 +140,9 @@ define i32 @gep_load_i32_align2_const_offset_wrap(i64 %idx){
   ret i32 %3
 }
 
-; TODO: this will be ret i32 42
 define i32 @inbounds_gep_i32_load_i32_const_ptr_array(i64 %idx){
 ; CHECK-LABEL: @inbounds_gep_i32_load_i32_const_ptr_array(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds ptr, ptr @constptrarray, i64 [[IDX:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-; CHECK-NEXT:    ret i32 [[TMP3]]
+; CHECK-NEXT:    ret i32 42
 ;
   %1 = getelementptr inbounds ptr, ptr @constptrarray, i64 %idx
   %2 = load ptr, ptr %1, align 4
@@ -163,16 +174,12 @@ define i32 @inbounds_gep_i8_load_i32_align1_packedstruct(i64 %idx){
   ret i32 %2
 }
 
-; TODO: this coould be folded into 65537(LE), 16777472(BE)
 define i32 @inbounds_gep_i32_load_i32_align4_struct_with_const_offset(i64 %idx){
 ; LE-LABEL: @inbounds_gep_i32_load_i32_align4_struct_with_const_offset(
 ; LE-NEXT:    ret i32 65537
 ;
 ; BE-LABEL: @inbounds_gep_i32_load_i32_align4_struct_with_const_offset(
-; BE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr @conststruct, i64 1
-; BE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDX:%.*]]
-; BE-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-; BE-NEXT:    ret i32 [[TMP3]]
+; BE-NEXT:    ret i32 16777472
 ;
   %1 = getelementptr inbounds i16, ptr @conststruct, i64 1
   %2 = getelementptr inbounds i32, ptr %1, i64 %idx