[llvm] [GlobalOpt] Add range metadata to loads from constant global variables (PR #127695)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 20 14:37:12 PST 2025


https://github.com/Ralender updated https://github.com/llvm/llvm-project/pull/127695

>From ad4e92e56d81c1d102a035eb798fda94ca859cda Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Thu, 20 Feb 2025 23:35:19 +0100
Subject: [PATCH] [GlobalOpt] Add range metadata to loads from constant global
 variables

---
 llvm/lib/Transforms/IPO/GlobalOpt.cpp         | 232 ++++++++++
 .../GlobalOpt/add_range_metadata.ll           | 434 ++++++++++++++++++
 2 files changed, 666 insertions(+)
 create mode 100644 llvm/test/Transforms/GlobalOpt/add_range_metadata.ll

diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 9586fc97a39f7..2b05c9062ad58 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -45,6 +45,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/Type.h"
@@ -2498,6 +2499,233 @@ OptimizeGlobalAliases(Module &M,
   return Changed;
 }
 
+struct AccessPattern {
+  Type* Ty;
+
+  APInt Stride;
+  APInt Offset;
+};
+
+template <> struct DenseMapInfo<AccessPattern> {
+  static inline AccessPattern getEmptyKey() {
+    return {(Type *)1, APInt(), APInt()};
+  }
+  static inline AccessPattern getTombstoneKey() {
+    return {(Type *)2, APInt(), APInt()};
+  }
+  static unsigned getHashValue(const AccessPattern &AP) {
+    return hash_combine(AP.Ty, AP.Stride, AP.Offset);
+  }
+  static bool isEqual(const AccessPattern &LHS, const AccessPattern &RHS) {
+    return LHS.Ty == RHS.Ty && LHS.Stride == RHS.Stride &&
+           LHS.Offset == RHS.Offset;
+  }
+};
+
+// return (gcd, x, y) such that a*x + b*y = gcd
+std::tuple<APInt, APInt, APInt> ExtendedSignedGCD(APInt a, APInt b) {
+  unsigned BW = a.getBitWidth();
+  APInt x = APInt(BW, 1);
+  APInt y = APInt(BW, 0);
+  APInt x1 = APInt(BW, 0);
+  APInt y1 = APInt(BW, 1);
+
+  while (b != 0) {
+    APInt q = APInt(BW, 0);
+    APInt r = APInt(BW, 0);
+    APInt::sdivrem(a, b, q, r);
+    a = std::move(b);
+    b = std::move(r);
+
+    std::swap(x, x1);
+    std::swap(y, y1);
+    x1 -= q * x;
+    y1 -= q * y;
+  }
+  return {a, x, y};
+}
+
+// Build if possible a new pair of Stride and Offset that are part of the
+// original but are also aligned.
+std::optional<std::pair<APInt, APInt>>
+AlignStrideAndOffset(const APInt &Stride, const APInt &Offset,
+                     const APInt &Align) {
+  // Here Offset * Align is added only to make sure Missing is positive or zero
+  APInt Missing = ((Offset * Align) - Offset).urem(Align);
+
+  // fast path for common case,
+  if (Missing == 0)
+    return {
+        {(Stride * Align).udiv(APIntOps::GreatestCommonDivisor(Stride, Align)),
+         Offset}};
+
+  auto [GCD, X, Y] = ExtendedSignedGCD(Stride, Align);
+  assert(APIntOps::GreatestCommonDivisor(Stride, Align) == GCD);
+  assert((X * Stride + Y * Align) == GCD);
+
+  if (Missing.urem(GCD) != 0) {
+    // The new Stride + Offset cannot be created because there is no elements in
+    // the original that would be properly aligned
+    return std::nullopt;
+  }
+
+  APInt StrideAlign = Stride * Align;
+  // X could be negative, so we need to use sdiv
+  // Here + Offset * Align is added only to make sure Missing is positive
+  APInt NumStride =
+      (((Missing * X).sdiv(GCD)) + (StrideAlign * Align)).urem(Align);
+
+  APInt NewOffset = Offset + (NumStride * Stride);
+  APInt NewStride = StrideAlign.udiv(GCD);
+  return {{std::move(NewStride), std::move(NewOffset)}};
+}
+
+static bool addRangeMetadata(Module &M) {
+  const DataLayout &DL = M.getDataLayout();
+  bool Changed = false;
+
+  for (GlobalValue &Global : M.global_values()) {
+
+    auto *GV = dyn_cast<GlobalVariable>(&Global);
+    if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+      continue;
+
+    // To be able to go to the next GlobalVariable with a return
+    [&] {
+      unsigned IndexBW = DL.getIndexTypeSizeInBits(GV->getType());
+
+      struct PointerInfo {
+        Value* Ptr;
+
+        // Zero denotes not set
+        APInt Stride;
+        APInt Offset;
+      };
+
+      // GEPs only take one pointer operand, the one we will come from, so we
+      // dont need to do uniqueing during the DFS
+      SmallVector<PointerInfo> Stack;
+
+      // All loads of the global that this code can analyze grouped by access
+      // pattern. Loads with the same access pattern can access the same offsets
+      // in the global, so they can be treated the same.
+      SmallDenseMap<AccessPattern, SmallVector<LoadInst *>> LoadsByAccess;
+
+      Stack.push_back({GV, APInt(IndexBW, 0), APInt(IndexBW, 0)});
+
+      while (!Stack.empty()) {
+        PointerInfo Curr = Stack.pop_back_val();
+
+        if (!isa<GlobalVariable>(Curr.Ptr)) {
+          if (auto *LI = dyn_cast<LoadInst>(Curr.Ptr)) {
+
+            if (!LI->getType()->isIntegerTy())
+              continue;
+
+            if (LI->hasMetadata(LLVMContext::MD_range))
+              continue;
+
+            // This is an access at a fixed offset, I expect this is handled
+            // elsewhere so we skip it.
+            if (Curr.Stride == 0)
+              continue;
+
+            // This case is very rare and weird, but what it means is that we
+            // dont know at runtime what offsets into the Global arrays are safe
+            // to access with this load. So we could run the following code
+            // ignoring alignment constraint from the load. but this case is
+            // rare and weird so we give-up.
+            if (LI->getAlign() > GV->getAlign().valueOrOne())
+              continue;
+
+            auto NewStrideAndOffset =
+                AlignStrideAndOffset(Curr.Stride, Curr.Offset,
+                                     APInt(IndexBW, LI->getAlign().value()));
+
+            if (!NewStrideAndOffset) {
+              // This load cannot access an offset with the correct alignment
+              LI->replaceAllUsesWith(PoisonValue::get(LI->getType()));
+              continue;
+            }
+
+            AccessPattern AP{LI->getType(), NewStrideAndOffset->first,
+                             NewStrideAndOffset->second};
+            assert(AP.Stride != 0);
+            LoadsByAccess[AP].push_back(LI);
+            continue;
+          }
+          auto *GEP = dyn_cast<GetElementPtrInst>(Curr.Ptr);
+          if (!GEP)
+            continue;
+
+          SmallMapVector<Value *, APInt, 4> VarOffsets;
+          if (!GEP->collectOffset(DL, IndexBW, VarOffsets, Curr.Offset))
+            break;
+
+          for (auto [V, Scale] : VarOffsets) {
+
+            // Commented out because I dont understand why we would need this
+            // But it was part of getStrideAndModOffsetOfGEP
+            // // Only keep a power of two factor for non-inbounds
+            // if (!GEP->isInBounds())
+            //   Scale =
+            //       APInt::getOneBitSet(Scale.getBitWidth(),
+            //       Scale.countr_zero());
+
+            if (Curr.Stride == 0)
+              Curr.Stride = Scale;
+            else
+              Curr.Stride = APIntOps::GreatestCommonDivisor(Curr.Stride, Scale);
+          }
+        }
+
+        for (User *U : Curr.Ptr->users()) {
+          if (isa<LoadInst, GetElementPtrInst>(U)) {
+            Curr.Ptr = U;
+            Stack.push_back(Curr);
+          }
+        }
+      }
+
+      for (auto [AP, Loads] : LoadsByAccess) {
+        {
+          APInt SMin = APInt::getSignedMaxValue(AP.Ty->getIntegerBitWidth());
+          APInt SMax = APInt::getSignedMinValue(AP.Ty->getIntegerBitWidth());
+
+          APInt LastValidOffset =
+              APInt(IndexBW, DL.getTypeAllocSize(GV->getValueType()) -
+                                 DL.getTypeStoreSize(AP.Ty));
+          for (APInt Offset = AP.Offset; Offset.ule(LastValidOffset);
+               Offset += AP.Stride) {
+            assert(Offset.isAligned(Loads[0]->getAlign()));
+            Constant *Cst = ConstantFoldLoadFromConstPtr(GV, AP.Ty, Offset, DL);
+
+            if (!Cst)
+              // Lambda captures of a struct binding is only available starting
+              // in C++20, so we skip to the next element with goto
+              goto NextGroup;
+
+            // MD_range is order agnostics
+            SMin = APIntOps::smin(SMin, Cst->getUniqueInteger());
+            SMax = APIntOps::smax(SMax, Cst->getUniqueInteger());
+          }
+
+          MDBuilder MDHelper(M.getContext());
+
+          // The Range is allowed to wrap
+          MDNode *RNode = MDHelper.createRange(SMin, SMax + 1);
+          for (LoadInst *LI : Loads)
+            LI->setMetadata(LLVMContext::MD_range, RNode);
+          Changed = true;
+        }
+      NextGroup:
+        (void)0; // Label expect statements
+      }
+    }();
+  }
+  return Changed;
+}
+
 static Function *
 FindAtExitLibFunc(Module &M,
                   function_ref<TargetLibraryInfo &(Function &)> GetTLI,
@@ -2887,6 +3115,10 @@ optimizeGlobalsInModule(Module &M, const DataLayout &DL,
     Changed |= LocalChange;
   }
 
+  // Add range metadata to loads from constant global variables based on the
+  // values that could be loaded from the variable
+  Changed |= addRangeMetadata(M);
+
   // TODO: Move all global ctors functions to the end of the module for code
   // layout.
 
diff --git a/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll b/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll
new file mode 100644
index 0000000000000..b936e22a83edc
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll
@@ -0,0 +1,434 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p globalopt -S %s | FileCheck %s
+
+ at gvar0 = constant { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 -5, i64 1, i64 10, [253 x i64] zeroinitializer }> }, align 8
+ at gvar1 = constant { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 0, i64 1, i64 5, [253 x i64] zeroinitializer }> }, align 8
+ at gvar2 = global [8 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 100, i32 -6789, i32 1], align 16
+ at gvar3 = constant [8 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 100, i32 -6789, i32 8388608], align 16
+ at gvar5 = constant [2 x [6 x i8]] [[6 x i8] c"\01a_\02-0", [6 x i8] c" \0E\FF\07\08\09"], align 1
+
+%struct.A = type { i32, i8, ptr, i16, i8 }
+ at gvar6 = constant [2 x [2 x %struct.A]] [[2 x %struct.A] [%struct.A { i32 8, i8 97, ptr null, i16 9, i8 12 }, %struct.A { i32 -1, i8 107, ptr null, i16 7, i8 0 }], [2 x %struct.A] [%struct.A { i32 16, i8 46, ptr null, i16 59, i8 95 }, %struct.A { i32 0, i8 0, ptr null, i16 49, i8 100 }]], align 16
+%struct.B = type <{ i32, i8, ptr, i16, i8 }>
+ at gvar7 = constant [2 x [2 x %struct.B]] [[2 x %struct.B] [%struct.B <{ i32 8, i8 97, ptr null, i16 9, i8 12 }>, %struct.B <{ i32 -1, i8 107, ptr null, i16 7, i8 0 }>], [2 x %struct.B] [%struct.B <{ i32 16, i8 46, ptr null, i16 59, i8 95 }>, %struct.B <{ i32 0, i8 0, ptr null, i16 49, i8 100 }>]], align 32
+%struct.C = type { i32, i32, i32 }
+ at gvar8 = constant [34 x %struct.C] [%struct.C { i32 0, i32 1, i32 2 }, %struct.C { i32 3, i32 4, i32 5 }, %struct.C { i32 6, i32 7, i32 8 }, %struct.C { i32 9, i32 10, i32 11 }, %struct.C { i32 12, i32 13, i32 14 }, %struct.C { i32 15, i32 16, i32 17 }, %struct.C { i32 18, i32 19, i32 20 }, %struct.C { i32 21, i32 22, i32 23 }, %struct.C { i32 24, i32 25, i32 26 }, %struct.C { i32 27, i32 28, i32 29 }, %struct.C { i32 30, i32 31, i32 32 }, %struct.C { i32 33, i32 34, i32 35 }, %struct.C { i32 36, i32 37, i32 38 }, %struct.C { i32 39, i32 40, i32 41 }, %struct.C { i32 42, i32 43, i32 44 }, %struct.C { i32 45, i32 46, i32 47 }, %struct.C { i32 48, i32 49, i32 50 }, %struct.C { i32 51, i32 52, i32 53 }, %struct.C { i32 54, i32 55, i32 56 }, %struct.C { i32 57, i32 58, i32 59 }, %struct.C { i32 60, i32 61, i32 62 }, %struct.C { i32 63, i32 64, i32 65 }, %struct.C { i32 66, i32 67, i32 68 }, %struct.C { i32 69, i32 70, i32 71 }, %struct.C { i32 72, i32 73, i32 74 }, %struct.C { i32 75, i32 76, i32 77 }, %struct.C { i32 78, i32 79, i32 80 }, %struct.C { i32 81, i32 82, i32 83 }, %struct.C { i32 84, i32 85, i32 86 }, %struct.C { i32 87, i32 88, i32 89 }, %struct.C { i32 90, i32 91, i32 92 }, %struct.C { i32 93, i32 94, i32 95 }, %struct.C { i32 96, i32 97, i32 98 }, %struct.C { i32 99, i32 100, i32 101 }], align 16, align 256
+ at gvar9 = constant [6 x [18 x i8]] [[18 x i8] c"\00\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10\11", [18 x i8] c"\12\13\14\15\16\17\18\19\1A\1B\1C\1D\1E\1F !\22#", [18 x i8] c"$%&'()*+,-./012345", [18 x i8] c"6789:;<=>?@ABCDEFG", [18 x i8] c"HIJKLMNOPQRSTUVWXY", [18 x i8] c"Z[\\]^_`abcdefghijk"], align 16
+
+define i64 @test_basic0(i64 %3) {
+; CHECK-LABEL: define i64 @test_basic0(
+; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr inbounds [256 x i64], ptr @gvar0, i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[PTR]], align 8, !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %ptr = getelementptr inbounds [256 x i64], ptr @gvar0, i64 0, i64 %3
+  %5 = load i64, ptr %ptr, align 8
+  ret i64 %5
+}
+
+define i64 @test_basic1(i64 %3) {
+; CHECK-LABEL: define i64 @test_basic1(
+; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr inbounds [32 x i64], ptr @gvar0, i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[PTR]], align 8, !range [[RNG0]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %ptr = getelementptr inbounds [32 x i64], ptr @gvar0, i64 0, i64 %3
+  %5 = load i64, ptr %ptr, align 8
+  ret i64 %5
+}
+
+define i32 @test_different_type(i64 %3) {
+; CHECK-LABEL: define i32 @test_different_type(
+; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr inbounds [512 x i32], ptr @gvar1, i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[PTR]], align 8, !range [[RNG1:![0-9]+]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %ptr = getelementptr inbounds [512 x i32], ptr @gvar1, i64 0, i64 %3
+  %5 = load i32, ptr %ptr, align 8
+  ret i32 %5
+}
+
+define i32 @test_non_constant(i64 %3) {
+; CHECK-LABEL: define i32 @test_non_constant(
+; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr inbounds [8 x i32], ptr @gvar2, i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[PTR]], align 8
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %ptr = getelementptr inbounds [8 x i32], ptr @gvar2, i64 0, i64 %3
+  %5 = load i32, ptr %ptr, align 8
+  ret i32 %5
+}
+
+define i64 @test_other(i8 %first_idx) {
+; CHECK-LABEL: define i64 @test_other(
+; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr @gvar3, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8, !range [[RNG2:![0-9]+]]
+; CHECK-NEXT:    ret i64 [[TMP0]]
+;
+entry:
+  %idxprom = zext i8 %first_idx to i64
+  %arrayidx = getelementptr inbounds i64, ptr @gvar3, i64 %idxprom
+  %0 = load i64, ptr %arrayidx, align 8
+  ret i64 %0
+}
+
+; This could be supported but is rare and more complex for for now we dont process it.
+define i64 @test_multiple_types0(i8 %first_idx) {
+; CHECK-LABEL: define i64 @test_multiple_types0(
+; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr @gvar3, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8, !range [[RNG2]]
+; CHECK-NEXT:    ret i64 [[TMP0]]
+;
+entry:
+  %idxprom = zext i8 %first_idx to i64
+  %arrayidx = getelementptr inbounds i64, ptr @gvar3, i64 %idxprom
+  %0 = load i64, ptr %arrayidx, align 8
+  ret i64 %0
+}
+
+define i32 @test_multiple_types1(i8 %first_idx) {
+; CHECK-LABEL: define i32 @test_multiple_types1(
+; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr @gvar3, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 8, !range [[RNG3:![0-9]+]]
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  %idxprom = zext i8 %first_idx to i64
+  %arrayidx = getelementptr inbounds i32, ptr @gvar3, i64 %idxprom
+  %0 = load i32, ptr %arrayidx, align 8
+  ret i32 %0
+}
+
+define i32 @test_overaligned_load(i8 %first_idx) {
+; CHECK-LABEL: define i32 @test_overaligned_load(
+; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr @gvar3, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 32
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  %idxprom = zext i8 %first_idx to i64
+  %arrayidx = getelementptr inbounds i32, ptr @gvar3, i64 %idxprom
+  %0 = load i32, ptr %arrayidx, align 32
+  ret i32 %0
+}
+
+; This could be supported also be supported, but for now it not.
+define dso_local signext i8 @multi_dimentional0(i8 zeroext %0, i8 zeroext %1) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local signext i8 @multi_dimentional0(
+; CHECK-SAME: i8 zeroext [[TMP0:%.*]], i8 zeroext [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x [6 x i8]], ptr @gvar5, i64 0, i64 [[TMP3]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1, !range [[RNG4:![0-9]+]]
+; CHECK-NEXT:    ret i8 [[TMP6]]
+;
+  %3 = zext i8 %0 to i64
+  %4 = zext i8 %1 to i64
+  %5 = getelementptr inbounds [2 x [6 x i8]], ptr @gvar5, i64 0, i64 %3, i64 %4
+  %6 = load i8, ptr %5, align 1
+  ret i8 %6
+}
+
+define i64 @test_complex0(i8 %0, i8 %1) {
+; CHECK-LABEL: define i64 @test_complex0(
+; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 [[TMP3]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8, !range [[RNG5:![0-9]+]]
+; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
+; CHECK-NEXT:    ret i64 [[TMP7]]
+;
+  %3 = zext i8 %0 to i64
+  %4 = zext i8 %1 to i64
+  %5 = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 %3, i64 %4
+  %6 = load i32, ptr %5, align 8
+  %7 = sext i32 %6 to i64
+  ret i64 %7
+}
+
+define i64 @test_multi_gep(i8 %0, i8 %1) {
+; CHECK-LABEL: define i64 @test_multi_gep(
+; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 [[TMP3]]
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [2 x %struct.A], ptr [[GEP0]], i64 0, i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[GEP1]], align 16, !range [[RNG6:![0-9]+]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x %struct.A], ptr [[GEP0]], i64 0, i64 [[TMP4]]
+; CHECK-NEXT:    [[B6:%.*]] = load i32, ptr [[GEP2]], align 4, !range [[RNG5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
+; CHECK-NEXT:    ret i64 [[TMP6]]
+;
+  %3 = zext i8 %0 to i64
+  %4 = zext i8 %1 to i64
+  %gep0 = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 %3
+  %gep1 = getelementptr inbounds [2 x %struct.A], ptr %gep0, i64 0, i64 %4
+  %6 = load i32, ptr %gep1, align 16
+  %gep2 = getelementptr inbounds [2 x %struct.A], ptr %gep0, i64 0, i64 %4
+  %b6 = load i32, ptr %gep2, align 4
+  %7 = sext i32 %6 to i64
+  ret i64 %7
+}
+
+define i64 @test_complex1(i8 %0, i8 %1) {
+; CHECK-LABEL: define i64 @test_complex1(
+; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 [[TMP3]], i64 [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8, !range [[RNG7:![0-9]+]]
+; CHECK-NEXT:    [[TMP7:%.*]] = zext i16 [[TMP6]] to i64
+; CHECK-NEXT:    ret i64 [[TMP7]]
+;
+  %4 = zext i8 %0 to i64
+  %5 = zext i8 %1 to i64
+  %6 = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 %4, i64 %5, i32 3
+  %7 = load i16, ptr %6, align 8
+  %8 = zext i16 %7 to i64
+  ret i64 %8
+}
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind sspstrong willreturn memory(none) uwtable
+define i64 @test_packed_struct0(i8 zeroext %0, i8 zeroext %1) {
+; CHECK-LABEL: define i64 @test_packed_struct0(
+; CHECK-SAME: i8 zeroext [[TMP0:%.*]], i8 zeroext [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 [[TMP3]], i64 [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 1, !range [[RNG7]]
+; CHECK-NEXT:    [[TMP7:%.*]] = zext i16 [[TMP6]] to i64
+; CHECK-NEXT:    ret i64 [[TMP7]]
+;
+  %3 = zext i8 %0 to i64
+  %4 = zext i8 %1 to i64
+  %5 = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 %3, i64 %4, i32 3
+  %6 = load i16, ptr %5, align 1
+  %7 = zext i16 %6 to i64
+  ret i64 %7
+}
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind sspstrong willreturn memory(none) uwtable
+define i64 @test_packed_struct_aligned(i8 zeroext %0, i8 zeroext %1) {
+; CHECK-LABEL: define i64 @test_packed_struct_aligned(
+; CHECK-SAME: i8 zeroext [[TMP0:%.*]], i8 zeroext [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 [[TMP3]], i64 [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = zext i16 poison to i64
+; CHECK-NEXT:    ret i64 [[TMP7]]
+;
+  %3 = zext i8 %0 to i64
+  %4 = zext i8 %1 to i64
+  %5 = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 %3, i64 %4, i32 3
+  %6 = load i16, ptr %5, align 8
+  %7 = zext i16 %6 to i64
+  ret i64 %7
+}
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind sspstrong willreturn memory(none) uwtable
+define i64 @test_packed_struct_aligned2(i8 zeroext %0, i8 zeroext %1) {
+; CHECK-LABEL: define i64 @test_packed_struct_aligned2(
+; CHECK-SAME: i8 zeroext [[TMP0:%.*]], i8 zeroext [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 [[TMP3]], i64 [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 16
+; CHECK-NEXT:    [[TMP7:%.*]] = zext i16 poison to i64
+; CHECK-NEXT:    ret i64 [[TMP7]]
+;
+  %3 = zext i8 %0 to i64
+  %4 = zext i8 %1 to i64
+  %5 = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 %3, i64 %4, i32 3
+  %6 = load i16, ptr %5, align 16
+  %7 = zext i16 %6 to i64
+  ret i64 %7
+}
+
+define i64 @test_alignment_stride0(i8 %0, i8 %1) {
+; CHECK-LABEL: define i64 @test_alignment_stride0(
+; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i32, ptr @gvar8, i64 32
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [100 x %struct.C], ptr [[GEP0]], i64 0, i64 [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !range [[RNG8:![0-9]+]]
+; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
+; CHECK-NEXT:    ret i64 [[TMP6]]
+;
+  %3 = zext i8 %0 to i64
+  %gep0 = getelementptr inbounds i32, ptr @gvar8, i64 32
+  %4 = getelementptr inbounds [100 x %struct.C], ptr %gep0, i64 0, i64 %3, i32 0
+  %5 = load i32, ptr %4, align 4
+  %6 = sext i32 %5 to i64
+  ret i64 %6
+}
+
+define i64 @test_alignment_stride1(i8 %0, i8 %1) {
+; CHECK-LABEL: define i64 @test_alignment_stride1(
+; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i32, ptr @gvar8, i64 32
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [100 x %struct.C], ptr [[GEP0]], i64 0, i64 [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8, !range [[RNG9:![0-9]+]]
+; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
+; CHECK-NEXT:    ret i64 [[TMP6]]
+;
+  %3 = zext i8 %0 to i64
+  %gep0 = getelementptr inbounds i32, ptr @gvar8, i64 32
+  %4 = getelementptr inbounds [100 x %struct.C], ptr %gep0, i64 0, i64 %3, i32 0
+  %5 = load i32, ptr %4, align 8
+  %6 = sext i32 %5 to i64
+  ret i64 %6
+}
+
+define i64 @test_alignment_stride2(i8 %0, i8 %1) {
+; CHECK-LABEL: define i64 @test_alignment_stride2(
+; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i32, ptr @gvar8, i64 32
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [100 x %struct.C], ptr [[GEP0]], i64 0, i64 [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 16, !range [[RNG10:![0-9]+]]
+; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
+; CHECK-NEXT:    ret i64 [[TMP6]]
+;
+  %3 = zext i8 %0 to i64
+  %gep0 = getelementptr inbounds i32, ptr @gvar8, i64 32
+  %4 = getelementptr inbounds [100 x %struct.C], ptr %gep0, i64 0, i64 %3, i32 0
+  %5 = load i32, ptr %4, align 16
+  %6 = sext i32 %5 to i64
+  ret i64 %6
+}
+
+define i64 @test_alignment_stride3(i8 %0, i8 %1) {
+; CHECK-LABEL: define i64 @test_alignment_stride3(
+; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i32, ptr @gvar8, i64 32
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [100 x %struct.C], ptr [[GEP0]], i64 0, i64 [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 32, !range [[RNG11:![0-9]+]]
+; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
+; CHECK-NEXT:    ret i64 [[TMP6]]
+;
+  %3 = zext i8 %0 to i64
+  %gep0 = getelementptr inbounds i32, ptr @gvar8, i64 32
+  %4 = getelementptr inbounds [100 x %struct.C], ptr %gep0, i64 0, i64 %3, i32 0
+  %5 = load i32, ptr %4, align 32
+  %6 = sext i32 %5 to i64
+  ret i64 %6
+}
+
+define i64 @test_strides(i8 %0) {
+; CHECK-LABEL: define i64 @test_strides(
+; CHECK-SAME: i8 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [6 x [18 x i8]], ptr @gvar9, i64 0, i64 [[TMP2]], i64 14
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8, !range [[RNG12:![0-9]+]]
+; CHECK-NEXT:    [[TMP5:%.*]] = sext i8 [[TMP4]] to i64
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %3 = zext i8 %0 to i64
+  %4 = getelementptr inbounds [6 x [18 x i8]], ptr @gvar9, i64 0, i64 %3, i64 14
+  %5 = load i8, ptr %4, align 8
+  %6 = sext i8 %5 to i64
+  ret i64 %6
+}
+
+define i64 @test_strides_poison(i8 %0) {
+; CHECK-LABEL: define i64 @test_strides_poison(
+; CHECK-SAME: i8 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [6 x [18 x i8]], ptr @gvar9, i64 0, i64 [[TMP2]], i64 7
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = sext i8 poison to i64
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %3 = zext i8 %0 to i64
+  %4 = getelementptr inbounds [6 x [18 x i8]], ptr @gvar9, i64 0, i64 %3, i64 7
+  %5 = load i8, ptr %4, align 8
+  %6 = sext i8 %5 to i64
+  ret i64 %6
+}
+
+define i64 @test_strides2(i8 %0) {
+; CHECK-LABEL: define i64 @test_strides2(
+; CHECK-SAME: i8 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [5 x [19 x i8]], ptr @gvar9, i64 0, i64 [[TMP2]], i64 8
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8, !range [[RNG13:![0-9]+]]
+; CHECK-NEXT:    [[TMP5:%.*]] = sext i8 [[TMP4]] to i64
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %3 = zext i8 %0 to i64
+  %4 = getelementptr inbounds [5 x [19 x i8]], ptr @gvar9, i64 0, i64 %3, i64 8
+  %5 = load i8, ptr %4, align 8
+  %6 = sext i8 %5 to i64
+  ret i64 %6
+}
+
+define i64 @test_strides3(i8 %0) {
+; CHECK-LABEL: define i64 @test_strides3(
+; CHECK-SAME: i8 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [12 x [9 x i8]], ptr @gvar9, i64 0, i64 2, i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 4, !range [[RNG14:![0-9]+]]
+; CHECK-NEXT:    [[TMP5:%.*]] = sext i8 [[TMP4]] to i64
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %3 = zext i8 %0 to i64
+  %4 = getelementptr inbounds [12 x [9 x i8]], ptr @gvar9, i64 0, i64 2, i64 %3
+  %5 = load i8, ptr %4, align 4
+  %6 = sext i8 %5 to i64
+  ret i64 %6
+}
+
+define i64 @test_strides4(i8 %0) {
+; CHECK-LABEL: define i64 @test_strides4(
+; CHECK-SAME: i8 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [4 x [27 x i8]], ptr @gvar9, i64 0, i64 [[TMP2]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 2, !range [[RNG15:![0-9]+]]
+; CHECK-NEXT:    [[TMP5:%.*]] = sext i16 [[TMP4]] to i64
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %3 = zext i8 %0 to i64
+  %4 = getelementptr inbounds [4 x [27 x i8]], ptr @gvar9, i64 0, i64 %3, i64 2
+  %5 = load i16, ptr %4, align 2
+  %6 = sext i16 %5 to i64
+  ret i64 %6
+}
+
+;.
+; CHECK: [[RNG0]] = !{i64 -5, i64 11}
+; CHECK: [[RNG1]] = !{i32 0, i32 6}
+; CHECK: [[RNG2]] = !{i64 2, i64 36028801313924476}
+; CHECK: [[RNG3]] = !{i32 -6789, i32 3}
+; CHECK: [[RNG4]] = !{i8 -1, i8 98}
+; CHECK: [[RNG5]] = !{i32 -1, i32 17}
+; CHECK: [[RNG6]] = !{i32 8, i32 17}
+; CHECK: [[RNG7]] = !{i16 7, i16 60}
+; CHECK: [[RNG8]] = !{i32 32, i32 102}
+; CHECK: [[RNG9]] = !{i32 32, i32 99}
+; CHECK: [[RNG10]] = !{i32 32, i32 93}
+; CHECK: [[RNG11]] = !{i32 32, i32 81}
+; CHECK: [[RNG12]] = !{i8 32, i8 105}
+; CHECK: [[RNG13]] = !{i8 8, i8 9}
+; CHECK: [[RNG14]] = !{i8 20, i8 105}
+; CHECK: [[RNG15]] = !{i16 770, i16 14649}
+;.



More information about the llvm-commits mailing list