[llvm] [GlobalOpt] Add range metadata to loads from constant global variables (PR #127695)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 20 14:37:12 PST 2025
https://github.com/Ralender updated https://github.com/llvm/llvm-project/pull/127695
>From ad4e92e56d81c1d102a035eb798fda94ca859cda Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Thu, 20 Feb 2025 23:35:19 +0100
Subject: [PATCH] [GlobalOpt] Add range metadata to loads from constant global
variables
---
llvm/lib/Transforms/IPO/GlobalOpt.cpp | 232 ++++++++++
.../GlobalOpt/add_range_metadata.ll | 434 ++++++++++++++++++
2 files changed, 666 insertions(+)
create mode 100644 llvm/test/Transforms/GlobalOpt/add_range_metadata.ll
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 9586fc97a39f7..2b05c9062ad58 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -45,6 +45,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
@@ -2498,6 +2499,233 @@ OptimizeGlobalAliases(Module &M,
return Changed;
}
+struct AccessPattern {
+ Type* Ty;
+
+ APInt Stride;
+ APInt Offset;
+};
+
+template <> struct DenseMapInfo<AccessPattern> {
+ static inline AccessPattern getEmptyKey() {
+ return {(Type *)1, APInt(), APInt()};
+ }
+ static inline AccessPattern getTombstoneKey() {
+ return {(Type *)2, APInt(), APInt()};
+ }
+ static unsigned getHashValue(const AccessPattern &AP) {
+ return hash_combine(AP.Ty, AP.Stride, AP.Offset);
+ }
+ static bool isEqual(const AccessPattern &LHS, const AccessPattern &RHS) {
+ return LHS.Ty == RHS.Ty && LHS.Stride == RHS.Stride &&
+ LHS.Offset == RHS.Offset;
+ }
+};
+
+// return (gcd, x, y) such that a*x + b*y = gcd
+std::tuple<APInt, APInt, APInt> ExtendedSignedGCD(APInt a, APInt b) {
+ unsigned BW = a.getBitWidth();
+ APInt x = APInt(BW, 1);
+ APInt y = APInt(BW, 0);
+ APInt x1 = APInt(BW, 0);
+ APInt y1 = APInt(BW, 1);
+
+ while (b != 0) {
+ APInt q = APInt(BW, 0);
+ APInt r = APInt(BW, 0);
+ APInt::sdivrem(a, b, q, r);
+ a = std::move(b);
+ b = std::move(r);
+
+ std::swap(x, x1);
+ std::swap(y, y1);
+ x1 -= q * x;
+ y1 -= q * y;
+ }
+ return {a, x, y};
+}
+
+// Build if possible a new pair of Stride and Offset that are part of the
+// original but are also aligned.
+std::optional<std::pair<APInt, APInt>>
+AlignStrideAndOffset(const APInt &Stride, const APInt &Offset,
+ const APInt &Align) {
+ // Here Offset * Align is added only to make sure Missing is positive or zero
+ APInt Missing = ((Offset * Align) - Offset).urem(Align);
+
+ // fast path for common case,
+ if (Missing == 0)
+ return {
+ {(Stride * Align).udiv(APIntOps::GreatestCommonDivisor(Stride, Align)),
+ Offset}};
+
+ auto [GCD, X, Y] = ExtendedSignedGCD(Stride, Align);
+ assert(APIntOps::GreatestCommonDivisor(Stride, Align) == GCD);
+ assert((X * Stride + Y * Align) == GCD);
+
+ if (Missing.urem(GCD) != 0) {
+ // The new Stride + Offset cannot be created because there is no elements in
+ // the original that would be properly aligned
+ return std::nullopt;
+ }
+
+ APInt StrideAlign = Stride * Align;
+ // X could be negative, so we need to use sdiv
+ // Here + Offset * Align is added only to make sure Missing is positive
+ APInt NumStride =
+ (((Missing * X).sdiv(GCD)) + (StrideAlign * Align)).urem(Align);
+
+ APInt NewOffset = Offset + (NumStride * Stride);
+ APInt NewStride = StrideAlign.udiv(GCD);
+ return {{std::move(NewStride), std::move(NewOffset)}};
+}
+
+static bool addRangeMetadata(Module &M) {
+ const DataLayout &DL = M.getDataLayout();
+ bool Changed = false;
+
+ for (GlobalValue &Global : M.global_values()) {
+
+ auto *GV = dyn_cast<GlobalVariable>(&Global);
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ continue;
+
+ // To be able to go to the next GlobalVariable with a return
+ [&] {
+ unsigned IndexBW = DL.getIndexTypeSizeInBits(GV->getType());
+
+ struct PointerInfo {
+ Value* Ptr;
+
+ // Zero denotes not set
+ APInt Stride;
+ APInt Offset;
+ };
+
+ // GEPs only take one pointer operand, the one we will come from, so we
+ // dont need to do uniqueing during the DFS
+ SmallVector<PointerInfo> Stack;
+
+ // All loads of the global that this code can analyze grouped by access
+ // pattern. Loads with the same access pattern can access the same offsets
+ // in the global, so they can be treated the same.
+ SmallDenseMap<AccessPattern, SmallVector<LoadInst *>> LoadsByAccess;
+
+ Stack.push_back({GV, APInt(IndexBW, 0), APInt(IndexBW, 0)});
+
+ while (!Stack.empty()) {
+ PointerInfo Curr = Stack.pop_back_val();
+
+ if (!isa<GlobalVariable>(Curr.Ptr)) {
+ if (auto *LI = dyn_cast<LoadInst>(Curr.Ptr)) {
+
+ if (!LI->getType()->isIntegerTy())
+ continue;
+
+ if (LI->hasMetadata(LLVMContext::MD_range))
+ continue;
+
+ // This is an access at a fixed offset, I expect this is handled
+ // elsewhere so we skip it.
+ if (Curr.Stride == 0)
+ continue;
+
+ // This case is very rare and weird, but what it means is that we
+ // dont know at runtime what offsets into the Global arrays are safe
+ // to access with this load. So we could run the following code
+ // ignoring alignment constraint from the load. but this case is
+ // rare and weird so we give-up.
+ if (LI->getAlign() > GV->getAlign().valueOrOne())
+ continue;
+
+ auto NewStrideAndOffset =
+ AlignStrideAndOffset(Curr.Stride, Curr.Offset,
+ APInt(IndexBW, LI->getAlign().value()));
+
+ if (!NewStrideAndOffset) {
+ // This load cannot access an offset with the correct alignment
+ LI->replaceAllUsesWith(PoisonValue::get(LI->getType()));
+ continue;
+ }
+
+ AccessPattern AP{LI->getType(), NewStrideAndOffset->first,
+ NewStrideAndOffset->second};
+ assert(AP.Stride != 0);
+ LoadsByAccess[AP].push_back(LI);
+ continue;
+ }
+ auto *GEP = dyn_cast<GetElementPtrInst>(Curr.Ptr);
+ if (!GEP)
+ continue;
+
+ SmallMapVector<Value *, APInt, 4> VarOffsets;
+ if (!GEP->collectOffset(DL, IndexBW, VarOffsets, Curr.Offset))
+ break;
+
+ for (auto [V, Scale] : VarOffsets) {
+
+ // Commented out because I dont understand why we would need this
+ // But it was part of getStrideAndModOffsetOfGEP
+ // // Only keep a power of two factor for non-inbounds
+ // if (!GEP->isInBounds())
+ // Scale =
+ // APInt::getOneBitSet(Scale.getBitWidth(),
+ // Scale.countr_zero());
+
+ if (Curr.Stride == 0)
+ Curr.Stride = Scale;
+ else
+ Curr.Stride = APIntOps::GreatestCommonDivisor(Curr.Stride, Scale);
+ }
+ }
+
+ for (User *U : Curr.Ptr->users()) {
+ if (isa<LoadInst, GetElementPtrInst>(U)) {
+ Curr.Ptr = U;
+ Stack.push_back(Curr);
+ }
+ }
+ }
+
+ for (auto [AP, Loads] : LoadsByAccess) {
+ {
+ APInt SMin = APInt::getSignedMaxValue(AP.Ty->getIntegerBitWidth());
+ APInt SMax = APInt::getSignedMinValue(AP.Ty->getIntegerBitWidth());
+
+ APInt LastValidOffset =
+ APInt(IndexBW, DL.getTypeAllocSize(GV->getValueType()) -
+ DL.getTypeStoreSize(AP.Ty));
+ for (APInt Offset = AP.Offset; Offset.ule(LastValidOffset);
+ Offset += AP.Stride) {
+ assert(Offset.isAligned(Loads[0]->getAlign()));
+ Constant *Cst = ConstantFoldLoadFromConstPtr(GV, AP.Ty, Offset, DL);
+
+ if (!Cst)
+ // Lambda captures of a struct binding is only available starting
+ // in C++20, so we skip to the next element with goto
+ goto NextGroup;
+
+ // MD_range is order agnostics
+ SMin = APIntOps::smin(SMin, Cst->getUniqueInteger());
+ SMax = APIntOps::smax(SMax, Cst->getUniqueInteger());
+ }
+
+ MDBuilder MDHelper(M.getContext());
+
+ // The Range is allowed to wrap
+ MDNode *RNode = MDHelper.createRange(SMin, SMax + 1);
+ for (LoadInst *LI : Loads)
+ LI->setMetadata(LLVMContext::MD_range, RNode);
+ Changed = true;
+ }
+ NextGroup:
+ (void)0; // Label expect statements
+ }
+ }();
+ }
+ return Changed;
+}
+
static Function *
FindAtExitLibFunc(Module &M,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
@@ -2887,6 +3115,10 @@ optimizeGlobalsInModule(Module &M, const DataLayout &DL,
Changed |= LocalChange;
}
+ // Add range metadata to loads from constant global variables based on the
+ // values that could be loaded from the variable
+ Changed |= addRangeMetadata(M);
+
// TODO: Move all global ctors functions to the end of the module for code
// layout.
diff --git a/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll b/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll
new file mode 100644
index 0000000000000..b936e22a83edc
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll
@@ -0,0 +1,434 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p globalopt -S %s | FileCheck %s
+
+ at gvar0 = constant { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 -5, i64 1, i64 10, [253 x i64] zeroinitializer }> }, align 8
+ at gvar1 = constant { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 0, i64 1, i64 5, [253 x i64] zeroinitializer }> }, align 8
+ at gvar2 = global [8 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 100, i32 -6789, i32 1], align 16
+ at gvar3 = constant [8 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 100, i32 -6789, i32 8388608], align 16
+ at gvar5 = constant [2 x [6 x i8]] [[6 x i8] c"\01a_\02-0", [6 x i8] c" \0E\FF\07\08\09"], align 1
+
+%struct.A = type { i32, i8, ptr, i16, i8 }
+ at gvar6 = constant [2 x [2 x %struct.A]] [[2 x %struct.A] [%struct.A { i32 8, i8 97, ptr null, i16 9, i8 12 }, %struct.A { i32 -1, i8 107, ptr null, i16 7, i8 0 }], [2 x %struct.A] [%struct.A { i32 16, i8 46, ptr null, i16 59, i8 95 }, %struct.A { i32 0, i8 0, ptr null, i16 49, i8 100 }]], align 16
+%struct.B = type <{ i32, i8, ptr, i16, i8 }>
+ at gvar7 = constant [2 x [2 x %struct.B]] [[2 x %struct.B] [%struct.B <{ i32 8, i8 97, ptr null, i16 9, i8 12 }>, %struct.B <{ i32 -1, i8 107, ptr null, i16 7, i8 0 }>], [2 x %struct.B] [%struct.B <{ i32 16, i8 46, ptr null, i16 59, i8 95 }>, %struct.B <{ i32 0, i8 0, ptr null, i16 49, i8 100 }>]], align 32
+%struct.C = type { i32, i32, i32 }
+ at gvar8 = constant [34 x %struct.C] [%struct.C { i32 0, i32 1, i32 2 }, %struct.C { i32 3, i32 4, i32 5 }, %struct.C { i32 6, i32 7, i32 8 }, %struct.C { i32 9, i32 10, i32 11 }, %struct.C { i32 12, i32 13, i32 14 }, %struct.C { i32 15, i32 16, i32 17 }, %struct.C { i32 18, i32 19, i32 20 }, %struct.C { i32 21, i32 22, i32 23 }, %struct.C { i32 24, i32 25, i32 26 }, %struct.C { i32 27, i32 28, i32 29 }, %struct.C { i32 30, i32 31, i32 32 }, %struct.C { i32 33, i32 34, i32 35 }, %struct.C { i32 36, i32 37, i32 38 }, %struct.C { i32 39, i32 40, i32 41 }, %struct.C { i32 42, i32 43, i32 44 }, %struct.C { i32 45, i32 46, i32 47 }, %struct.C { i32 48, i32 49, i32 50 }, %struct.C { i32 51, i32 52, i32 53 }, %struct.C { i32 54, i32 55, i32 56 }, %struct.C { i32 57, i32 58, i32 59 }, %struct.C { i32 60, i32 61, i32 62 }, %struct.C { i32 63, i32 64, i32 65 }, %struct.C { i32 66, i32 67, i32 68 }, %struct.C { i32 69, i32 70, i32 71 }, %struct.C { i32 72, i32 73, i32 74 }, %struct.C { i32 75, i32 76, i32 77 }, %struct.C { i32 78, i32 79, i32 80 }, %struct.C { i32 81, i32 82, i32 83 }, %struct.C { i32 84, i32 85, i32 86 }, %struct.C { i32 87, i32 88, i32 89 }, %struct.C { i32 90, i32 91, i32 92 }, %struct.C { i32 93, i32 94, i32 95 }, %struct.C { i32 96, i32 97, i32 98 }, %struct.C { i32 99, i32 100, i32 101 }], align 16, align 256
+ at gvar9 = constant [6 x [18 x i8]] [[18 x i8] c"\00\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10\11", [18 x i8] c"\12\13\14\15\16\17\18\19\1A\1B\1C\1D\1E\1F !\22#", [18 x i8] c"$%&'()*+,-./012345", [18 x i8] c"6789:;<=>?@ABCDEFG", [18 x i8] c"HIJKLMNOPQRSTUVWXY", [18 x i8] c"Z[\\]^_`abcdefghijk"], align 16
+
+define i64 @test_basic0(i64 %3) {
+; CHECK-LABEL: define i64 @test_basic0(
+; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [256 x i64], ptr @gvar0, i64 0, i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR]], align 8, !range [[RNG0:![0-9]+]]
+; CHECK-NEXT: ret i64 [[TMP2]]
+;
+ %ptr = getelementptr inbounds [256 x i64], ptr @gvar0, i64 0, i64 %3
+ %5 = load i64, ptr %ptr, align 8
+ ret i64 %5
+}
+
+define i64 @test_basic1(i64 %3) {
+; CHECK-LABEL: define i64 @test_basic1(
+; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [32 x i64], ptr @gvar0, i64 0, i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR]], align 8, !range [[RNG0]]
+; CHECK-NEXT: ret i64 [[TMP2]]
+;
+ %ptr = getelementptr inbounds [32 x i64], ptr @gvar0, i64 0, i64 %3
+ %5 = load i64, ptr %ptr, align 8
+ ret i64 %5
+}
+
+define i32 @test_different_type(i64 %3) {
+; CHECK-LABEL: define i32 @test_different_type(
+; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [512 x i32], ptr @gvar1, i64 0, i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PTR]], align 8, !range [[RNG1:![0-9]+]]
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
+ %ptr = getelementptr inbounds [512 x i32], ptr @gvar1, i64 0, i64 %3
+ %5 = load i32, ptr %ptr, align 8
+ ret i32 %5
+}
+
+define i32 @test_non_constant(i64 %3) {
+; CHECK-LABEL: define i32 @test_non_constant(
+; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [8 x i32], ptr @gvar2, i64 0, i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PTR]], align 8
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
+ %ptr = getelementptr inbounds [8 x i32], ptr @gvar2, i64 0, i64 %3
+ %5 = load i32, ptr %ptr, align 8
+ ret i32 %5
+}
+
+define i64 @test_other(i8 %first_idx) {
+; CHECK-LABEL: define i64 @test_other(
+; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr @gvar3, i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8, !range [[RNG2:![0-9]+]]
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %idxprom = zext i8 %first_idx to i64
+ %arrayidx = getelementptr inbounds i64, ptr @gvar3, i64 %idxprom
+ %0 = load i64, ptr %arrayidx, align 8
+ ret i64 %0
+}
+
+; This could be supported but is rare and more complex for for now we dont process it.
+define i64 @test_multiple_types0(i8 %first_idx) {
+; CHECK-LABEL: define i64 @test_multiple_types0(
+; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr @gvar3, i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8, !range [[RNG2]]
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %idxprom = zext i8 %first_idx to i64
+ %arrayidx = getelementptr inbounds i64, ptr @gvar3, i64 %idxprom
+ %0 = load i64, ptr %arrayidx, align 8
+ ret i64 %0
+}
+
+define i32 @test_multiple_types1(i8 %first_idx) {
+; CHECK-LABEL: define i32 @test_multiple_types1(
+; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr @gvar3, i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 8, !range [[RNG3:![0-9]+]]
+; CHECK-NEXT: ret i32 [[TMP0]]
+;
+entry:
+ %idxprom = zext i8 %first_idx to i64
+ %arrayidx = getelementptr inbounds i32, ptr @gvar3, i64 %idxprom
+ %0 = load i32, ptr %arrayidx, align 8
+ ret i32 %0
+}
+
+define i32 @test_overaligned_load(i8 %first_idx) {
+; CHECK-LABEL: define i32 @test_overaligned_load(
+; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr @gvar3, i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 32
+; CHECK-NEXT: ret i32 [[TMP0]]
+;
+entry:
+ %idxprom = zext i8 %first_idx to i64
+ %arrayidx = getelementptr inbounds i32, ptr @gvar3, i64 %idxprom
+ %0 = load i32, ptr %arrayidx, align 32
+ ret i32 %0
+}
+
+; This could be supported also be supported, but for now it not.
+define dso_local signext i8 @multi_dimentional0(i8 zeroext %0, i8 zeroext %1) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local signext i8 @multi_dimentional0(
+; CHECK-SAME: i8 zeroext [[TMP0:%.*]], i8 zeroext [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [6 x i8]], ptr @gvar5, i64 0, i64 [[TMP3]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1, !range [[RNG4:![0-9]+]]
+; CHECK-NEXT: ret i8 [[TMP6]]
+;
+ %3 = zext i8 %0 to i64
+ %4 = zext i8 %1 to i64
+ %5 = getelementptr inbounds [2 x [6 x i8]], ptr @gvar5, i64 0, i64 %3, i64 %4
+ %6 = load i8, ptr %5, align 1
+ ret i8 %6
+}
+
+define i64 @test_complex0(i8 %0, i8 %1) {
+; CHECK-LABEL: define i64 @test_complex0(
+; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 [[TMP3]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8, !range [[RNG5:![0-9]+]]
+; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
+; CHECK-NEXT: ret i64 [[TMP7]]
+;
+ %3 = zext i8 %0 to i64
+ %4 = zext i8 %1 to i64
+ %5 = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 %3, i64 %4
+ %6 = load i32, ptr %5, align 8
+ %7 = sext i32 %6 to i64
+ ret i64 %7
+}
+
+define i64 @test_multi_gep(i8 %0, i8 %1) {
+; CHECK-LABEL: define i64 @test_multi_gep(
+; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 [[TMP3]]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x %struct.A], ptr [[GEP0]], i64 0, i64 [[TMP4]]
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP1]], align 16, !range [[RNG6:![0-9]+]]
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x %struct.A], ptr [[GEP0]], i64 0, i64 [[TMP4]]
+; CHECK-NEXT: [[B6:%.*]] = load i32, ptr [[GEP2]], align 4, !range [[RNG5]]
+; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
+; CHECK-NEXT: ret i64 [[TMP6]]
+;
+ %3 = zext i8 %0 to i64
+ %4 = zext i8 %1 to i64
+ %gep0 = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 %3
+ %gep1 = getelementptr inbounds [2 x %struct.A], ptr %gep0, i64 0, i64 %4
+ %6 = load i32, ptr %gep1, align 16
+ %gep2 = getelementptr inbounds [2 x %struct.A], ptr %gep0, i64 0, i64 %4
+ %b6 = load i32, ptr %gep2, align 4
+ %7 = sext i32 %6 to i64
+ ret i64 %7
+}
+
+define i64 @test_complex1(i8 %0, i8 %1) {
+; CHECK-LABEL: define i64 @test_complex1(
+; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 [[TMP3]], i64 [[TMP4]], i32 3
+; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8, !range [[RNG7:![0-9]+]]
+; CHECK-NEXT: [[TMP7:%.*]] = zext i16 [[TMP6]] to i64
+; CHECK-NEXT: ret i64 [[TMP7]]
+;
+ %4 = zext i8 %0 to i64
+ %5 = zext i8 %1 to i64
+ %6 = getelementptr inbounds [2 x [2 x %struct.A]], ptr @gvar6, i64 0, i64 %4, i64 %5, i32 3
+ %7 = load i16, ptr %6, align 8
+ %8 = zext i16 %7 to i64
+ ret i64 %8
+}
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind sspstrong willreturn memory(none) uwtable
+define i64 @test_packed_struct0(i8 zeroext %0, i8 zeroext %1) {
+; CHECK-LABEL: define i64 @test_packed_struct0(
+; CHECK-SAME: i8 zeroext [[TMP0:%.*]], i8 zeroext [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 [[TMP3]], i64 [[TMP4]], i32 3
+; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 1, !range [[RNG7]]
+; CHECK-NEXT: [[TMP7:%.*]] = zext i16 [[TMP6]] to i64
+; CHECK-NEXT: ret i64 [[TMP7]]
+;
+ %3 = zext i8 %0 to i64
+ %4 = zext i8 %1 to i64
+ %5 = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 %3, i64 %4, i32 3
+ %6 = load i16, ptr %5, align 1
+ %7 = zext i16 %6 to i64
+ ret i64 %7
+}
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind sspstrong willreturn memory(none) uwtable
+define i64 @test_packed_struct_aligned(i8 zeroext %0, i8 zeroext %1) {
+; CHECK-LABEL: define i64 @test_packed_struct_aligned(
+; CHECK-SAME: i8 zeroext [[TMP0:%.*]], i8 zeroext [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 [[TMP3]], i64 [[TMP4]], i32 3
+; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = zext i16 poison to i64
+; CHECK-NEXT: ret i64 [[TMP7]]
+;
+ %3 = zext i8 %0 to i64
+ %4 = zext i8 %1 to i64
+ %5 = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 %3, i64 %4, i32 3
+ %6 = load i16, ptr %5, align 8
+ %7 = zext i16 %6 to i64
+ ret i64 %7
+}
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind sspstrong willreturn memory(none) uwtable
+define i64 @test_packed_struct_aligned2(i8 zeroext %0, i8 zeroext %1) {
+; CHECK-LABEL: define i64 @test_packed_struct_aligned2(
+; CHECK-SAME: i8 zeroext [[TMP0:%.*]], i8 zeroext [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 [[TMP3]], i64 [[TMP4]], i32 3
+; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = zext i16 poison to i64
+; CHECK-NEXT: ret i64 [[TMP7]]
+;
+ %3 = zext i8 %0 to i64
+ %4 = zext i8 %1 to i64
+ %5 = getelementptr inbounds [2 x [2 x %struct.B]], ptr @gvar7, i64 0, i64 %3, i64 %4, i32 3
+ %6 = load i16, ptr %5, align 16
+ %7 = zext i16 %6 to i64
+ ret i64 %7
+}
+
+define i64 @test_alignment_stride0(i8 %0, i8 %1) {
+; CHECK-LABEL: define i64 @test_alignment_stride0(
+; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr @gvar8, i64 32
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [100 x %struct.C], ptr [[GEP0]], i64 0, i64 [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !range [[RNG8:![0-9]+]]
+; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
+; CHECK-NEXT: ret i64 [[TMP6]]
+;
+ %3 = zext i8 %0 to i64
+ %gep0 = getelementptr inbounds i32, ptr @gvar8, i64 32
+ %4 = getelementptr inbounds [100 x %struct.C], ptr %gep0, i64 0, i64 %3, i32 0
+ %5 = load i32, ptr %4, align 4
+ %6 = sext i32 %5 to i64
+ ret i64 %6
+}
+
+define i64 @test_alignment_stride1(i8 %0, i8 %1) {
+; CHECK-LABEL: define i64 @test_alignment_stride1(
+; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr @gvar8, i64 32
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [100 x %struct.C], ptr [[GEP0]], i64 0, i64 [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8, !range [[RNG9:![0-9]+]]
+; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
+; CHECK-NEXT: ret i64 [[TMP6]]
+;
+ %3 = zext i8 %0 to i64
+ %gep0 = getelementptr inbounds i32, ptr @gvar8, i64 32
+ %4 = getelementptr inbounds [100 x %struct.C], ptr %gep0, i64 0, i64 %3, i32 0
+ %5 = load i32, ptr %4, align 8
+ %6 = sext i32 %5 to i64
+ ret i64 %6
+}
+
+define i64 @test_alignment_stride2(i8 %0, i8 %1) {
+; CHECK-LABEL: define i64 @test_alignment_stride2(
+; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr @gvar8, i64 32
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [100 x %struct.C], ptr [[GEP0]], i64 0, i64 [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 16, !range [[RNG10:![0-9]+]]
+; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
+; CHECK-NEXT: ret i64 [[TMP6]]
+;
+ %3 = zext i8 %0 to i64
+ %gep0 = getelementptr inbounds i32, ptr @gvar8, i64 32
+ %4 = getelementptr inbounds [100 x %struct.C], ptr %gep0, i64 0, i64 %3, i32 0
+ %5 = load i32, ptr %4, align 16
+ %6 = sext i32 %5 to i64
+ ret i64 %6
+}
+
+define i64 @test_alignment_stride3(i8 %0, i8 %1) {
+; CHECK-LABEL: define i64 @test_alignment_stride3(
+; CHECK-SAME: i8 [[TMP0:%.*]], i8 [[TMP1:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr @gvar8, i64 32
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [100 x %struct.C], ptr [[GEP0]], i64 0, i64 [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 32, !range [[RNG11:![0-9]+]]
+; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
+; CHECK-NEXT: ret i64 [[TMP6]]
+;
+ %3 = zext i8 %0 to i64
+ %gep0 = getelementptr inbounds i32, ptr @gvar8, i64 32
+ %4 = getelementptr inbounds [100 x %struct.C], ptr %gep0, i64 0, i64 %3, i32 0
+ %5 = load i32, ptr %4, align 32
+ %6 = sext i32 %5 to i64
+ ret i64 %6
+}
+
+define i64 @test_strides(i8 %0) {
+; CHECK-LABEL: define i64 @test_strides(
+; CHECK-SAME: i8 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [6 x [18 x i8]], ptr @gvar9, i64 0, i64 [[TMP2]], i64 14
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8, !range [[RNG12:![0-9]+]]
+; CHECK-NEXT: [[TMP5:%.*]] = sext i8 [[TMP4]] to i64
+; CHECK-NEXT: ret i64 [[TMP5]]
+;
+ %3 = zext i8 %0 to i64
+ %4 = getelementptr inbounds [6 x [18 x i8]], ptr @gvar9, i64 0, i64 %3, i64 14
+ %5 = load i8, ptr %4, align 8
+ %6 = sext i8 %5 to i64
+ ret i64 %6
+}
+
+define i64 @test_strides_poison(i8 %0) {
+; CHECK-LABEL: define i64 @test_strides_poison(
+; CHECK-SAME: i8 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [6 x [18 x i8]], ptr @gvar9, i64 0, i64 [[TMP2]], i64 7
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = sext i8 poison to i64
+; CHECK-NEXT: ret i64 [[TMP5]]
+;
+ %3 = zext i8 %0 to i64
+ %4 = getelementptr inbounds [6 x [18 x i8]], ptr @gvar9, i64 0, i64 %3, i64 7
+ %5 = load i8, ptr %4, align 8
+ %6 = sext i8 %5 to i64
+ ret i64 %6
+}
+
+define i64 @test_strides2(i8 %0) {
+; CHECK-LABEL: define i64 @test_strides2(
+; CHECK-SAME: i8 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [5 x [19 x i8]], ptr @gvar9, i64 0, i64 [[TMP2]], i64 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8, !range [[RNG13:![0-9]+]]
+; CHECK-NEXT: [[TMP5:%.*]] = sext i8 [[TMP4]] to i64
+; CHECK-NEXT: ret i64 [[TMP5]]
+;
+ %3 = zext i8 %0 to i64
+ %4 = getelementptr inbounds [5 x [19 x i8]], ptr @gvar9, i64 0, i64 %3, i64 8
+ %5 = load i8, ptr %4, align 8
+ %6 = sext i8 %5 to i64
+ ret i64 %6
+}
+
+define i64 @test_strides3(i8 %0) {
+; CHECK-LABEL: define i64 @test_strides3(
+; CHECK-SAME: i8 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [12 x [9 x i8]], ptr @gvar9, i64 0, i64 2, i64 [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 4, !range [[RNG14:![0-9]+]]
+; CHECK-NEXT: [[TMP5:%.*]] = sext i8 [[TMP4]] to i64
+; CHECK-NEXT: ret i64 [[TMP5]]
+;
+ %3 = zext i8 %0 to i64
+ %4 = getelementptr inbounds [12 x [9 x i8]], ptr @gvar9, i64 0, i64 2, i64 %3
+ %5 = load i8, ptr %4, align 4
+ %6 = sext i8 %5 to i64
+ ret i64 %6
+}
+
+define i64 @test_strides4(i8 %0) {
+; CHECK-LABEL: define i64 @test_strides4(
+; CHECK-SAME: i8 [[TMP0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x [27 x i8]], ptr @gvar9, i64 0, i64 [[TMP2]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 2, !range [[RNG15:![0-9]+]]
+; CHECK-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i64
+; CHECK-NEXT: ret i64 [[TMP5]]
+;
+ %3 = zext i8 %0 to i64
+ %4 = getelementptr inbounds [4 x [27 x i8]], ptr @gvar9, i64 0, i64 %3, i64 2
+ %5 = load i16, ptr %4, align 2
+ %6 = sext i16 %5 to i64
+ ret i64 %6
+}
+
+;.
+; CHECK: [[RNG0]] = !{i64 -5, i64 11}
+; CHECK: [[RNG1]] = !{i32 0, i32 6}
+; CHECK: [[RNG2]] = !{i64 2, i64 36028801313924476}
+; CHECK: [[RNG3]] = !{i32 -6789, i32 3}
+; CHECK: [[RNG4]] = !{i8 -1, i8 98}
+; CHECK: [[RNG5]] = !{i32 -1, i32 17}
+; CHECK: [[RNG6]] = !{i32 8, i32 17}
+; CHECK: [[RNG7]] = !{i16 7, i16 60}
+; CHECK: [[RNG8]] = !{i32 32, i32 102}
+; CHECK: [[RNG9]] = !{i32 32, i32 99}
+; CHECK: [[RNG10]] = !{i32 32, i32 93}
+; CHECK: [[RNG11]] = !{i32 32, i32 81}
+; CHECK: [[RNG12]] = !{i8 32, i8 105}
+; CHECK: [[RNG13]] = !{i8 8, i8 9}
+; CHECK: [[RNG14]] = !{i8 20, i8 105}
+; CHECK: [[RNG15]] = !{i16 770, i16 14649}
+;.
More information about the llvm-commits
mailing list