[llvm] [SeparateConstOffsetFromGEP] Remove support for arithmetic lowering (PR #151477)
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 31 02:24:43 PDT 2025
https://github.com/nikic created https://github.com/llvm/llvm-project/pull/151477
I don't think there is any benefit to lowering to ptrtoint + arithmetic + inttoptr over the newer ptradd lowering. Even if a target does not use codegen AA, it probably still has IR passes that benefit from correct representation.
As far as I can tell, no targets actually use this configuration anymore (they either don't use the LowerGEP option, or they they UseAA and thus the ptradd lowering).
>From c4a4dd0f06a2e8bc681348a28b4f36342347abdd Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Thu, 31 Jul 2025 11:17:45 +0200
Subject: [PATCH] [SeparateConstOffsetFromGEP] Remove support for arithmetic
lowering
I don't think there is any benefit to lowering to ptrtoint +
arithmetic + inttoptr over the newer ptradd lowering. Even if a
target does not use codegen AA, it probably still has IR passes
that benefit from correct representation.
As far as I can tell, no targets actually use this configuration
(they either don't use the LowerGEP option, or they they UseAA).
---
.../Scalar/SeparateConstOffsetFromGEP.cpp | 133 +++---------------
llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll | 32 ++---
.../split-gep-or-as-add.ll | 6 +-
.../split-gep-sub.ll | 6 +-
4 files changed, 31 insertions(+), 146 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 320b79203c0b3..6ffe8413a5079 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -79,8 +79,7 @@
// ld.global.f32 %f4, [%rl6+132]; // much better
//
// Another improvement enabled by the LowerGEP flag is to lower a GEP with
-// multiple indices to either multiple GEPs with a single index or arithmetic
-// operations (depending on whether the target uses alias analysis in codegen).
+// multiple indices to multiple GEPs with a single index.
// Such transformation can have following benefits:
// (1) It can always extract constants in the indices of structure type.
// (2) After such Lowering, there are more optimization opportunities such as
@@ -88,59 +87,33 @@
//
// E.g. The following GEPs have multiple indices:
// BB1:
-// %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3
+// %p = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 3
// load %p
// ...
// BB2:
-// %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2
+// %p2 = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 2
// load %p2
// ...
//
// We can not do CSE to the common part related to index "i64 %i". Lowering
// GEPs can achieve such goals.
-// If the target does not use alias analysis in codegen, this pass will
-// lower a GEP with multiple indices into arithmetic operations:
-// BB1:
-// %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
-// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
-// %3 = add i64 %1, %2 ; CSE opportunity
-// %4 = mul i64 %j1, length_of_struct
-// %5 = add i64 %3, %4
-// %6 = add i64 %3, struct_field_3 ; Constant offset
-// %p = inttoptr i64 %6 to i32*
-// load %p
-// ...
-// BB2:
-// %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
-// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
-// %9 = add i64 %7, %8 ; CSE opportunity
-// %10 = mul i64 %j2, length_of_struct
-// %11 = add i64 %9, %10
-// %12 = add i64 %11, struct_field_2 ; Constant offset
-// %p = inttoptr i64 %12 to i32*
-// load %p2
-// ...
//
-// If the target uses alias analysis in codegen, this pass will lower a GEP
-// with multiple indices into multiple GEPs with a single index:
+// This pass will lower a GEP with multiple indices into multiple GEPs with a
+// single index:
// BB1:
-// %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
-// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
-// %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity
+// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
+// %3 = getelementptr i8, ptr %ptr, i64 %2 ; CSE opportunity
// %4 = mul i64 %j1, length_of_struct
-// %5 = getelementptr i8* %3, i64 %4
-// %6 = getelementptr i8* %5, struct_field_3 ; Constant offset
-// %p = bitcast i8* %6 to i32*
+// %5 = getelementptr i8, ptr %3, i64 %4
+// %p = getelementptr i8, ptr %5, struct_field_3 ; Constant offset
// load %p
// ...
// BB2:
-// %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
-// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
-// %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity
+// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
+// %9 = getelementptr i8, ptr %ptr, i64 %8 ; CSE opportunity
// %10 = mul i64 %j2, length_of_struct
-// %11 = getelementptr i8* %9, i64 %10
-// %12 = getelementptr i8* %11, struct_field_2 ; Constant offset
-// %p2 = bitcast i8* %12 to i32*
+// %11 = getelementptr i8, ptr %9, i64 %10
+// %p2 = getelementptr i8, ptr %11, struct_field_2 ; Constant offset
// load %p2
// ...
//
@@ -408,16 +381,6 @@ class SeparateConstOffsetFromGEP {
void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
int64_t AccumulativeByteOffset);
- /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
- /// Function splitGEP already split the original GEP into a variadic part and
- /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
- /// variadic part into a set of arithmetic operations and applies
- /// AccumulativeByteOffset to it.
- /// \p Variadic The variadic part of the original GEP.
- /// \p AccumulativeByteOffset The constant offset.
- void lowerToArithmetics(GetElementPtrInst *Variadic,
- int64_t AccumulativeByteOffset);
-
/// Finds the constant offset within each index and accumulates them. If
/// LowerGEP is true, it finds in indices of both sequential and structure
/// types, otherwise it only finds in sequential indices. The output
@@ -951,55 +914,6 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
Variadic->eraseFromParent();
}
-void
-SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
- int64_t AccumulativeByteOffset) {
- IRBuilder<> Builder(Variadic);
- Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
- assert(IntPtrTy == DL->getIndexType(Variadic->getType()) &&
- "Pointer type must match index type for arithmetic-based lowering of "
- "split GEPs");
-
- Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
- gep_type_iterator GTI = gep_type_begin(*Variadic);
- // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
- // don't create arithmetics for structure indices, as they are accumulated
- // in the constant offset index.
- for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
- if (GTI.isSequential()) {
- Value *Idx = Variadic->getOperand(I);
- // Skip zero indices.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
- if (CI->isZero())
- continue;
-
- APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
- GTI.getSequentialElementStride(*DL));
- // Scale the index by element size.
- if (ElementSize != 1) {
- if (ElementSize.isPowerOf2()) {
- Idx = Builder.CreateShl(
- Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
- } else {
- Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
- }
- }
- // Create an ADD for each index.
- ResultPtr = Builder.CreateAdd(ResultPtr, Idx);
- }
- }
-
- // Create an ADD for the constant offset index.
- if (AccumulativeByteOffset != 0) {
- ResultPtr = Builder.CreateAdd(
- ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));
- }
-
- ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());
- Variadic->replaceAllUsesWith(ResultPtr);
- Variadic->eraseFromParent();
-}
-
bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
TargetTransformInfo &TTI) {
auto PtrGEP = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
@@ -1091,8 +1005,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// Notice that we don't remove struct field indices here. If LowerGEP is
// disabled, a structure index is not accumulated and we still use the old
// one. If LowerGEP is enabled, a structure index is accumulated in the
- // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
- // handle the constant offset and won't need a new structure index.
+ // constant offset. LowerToSingleIndexGEPs will later handle the constant
+ // offset and won't need a new structure index.
gep_type_iterator GTI = gep_type_begin(*GEP);
for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
if (GTI.isSequential()) {
@@ -1167,22 +1081,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
GEP->setNoWrapFlags(NewGEPFlags);
- // Lowers a GEP to either GEPs with a single index or arithmetic operations.
+ // Lowers a GEP to GEPs with a single index.
if (LowerGEP) {
- // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
- // arithmetic operations if the target uses alias analysis in codegen.
- // Additionally, pointers that aren't integral (and so can't be safely
- // converted to integers) or those whose offset size is different from their
- // pointer size (which means that doing integer arithmetic on them could
- // affect that data) can't be lowered in this way.
- unsigned AddrSpace = GEP->getPointerAddressSpace();
- bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) !=
- DL->getIndexSizeInBits(AddrSpace);
- if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) ||
- PointerHasExtraData)
- lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
- else
- lowerToArithmetics(GEP, AccumulativeByteOffset);
+ lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll
index 578038b8f4daa..d9cdac4946360 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll
@@ -1,8 +1,8 @@
; RUN: llc -O3 -aarch64-enable-gep-opt=true -verify-machineinstrs %s -o - | FileCheck %s
-; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s
-; RUN: llc -O3 -aarch64-enable-gep-opt=true -aarch64-use-aa=false -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-NoAA %s
-; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s
-; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -aarch64-use-aa=false -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"
@@ -38,24 +38,12 @@ if.end: ; preds = %if.then, %entry
; CHECK-NOT: madd
; CHECK:ldr
-; CHECK-NoAA-LABEL: @test_GEP_CSE(
-; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint ptr %string to i64
-; CHECK-NoAA: [[PTR1:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
-; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], [[PTR1]]
-; CHECK-NoAA: add i64 [[PTR2]], 23052
-; CHECK-NoAA: inttoptr
-; CHECK-NoAA: if.then:
-; CHECK-NoAA-NOT: ptrtoint
-; CHECK-NoAA-NOT: mul
-; CHECK-NoAA: add i64 [[PTR2]], 23048
-; CHECK-NoAA: inttoptr
-
-; CHECK-UseAA-LABEL: @test_GEP_CSE(
-; CHECK-UseAA: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
-; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, ptr %string, i64 [[IDX]]
-; CHECK-UseAA: getelementptr i8, ptr [[PTR1]], i64 23052
-; CHECK-UseAA: if.then:
-; CHECK-UseAA: getelementptr i8, ptr [[PTR1]], i64 23048
+; CHECK-IR-LABEL: @test_GEP_CSE(
+; CHECK-IR: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
+; CHECK-IR: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, ptr %string, i64 [[IDX]]
+; CHECK-IR: getelementptr i8, ptr [[PTR1]], i64 23052
+; CHECK-IR: if.then:
+; CHECK-IR: getelementptr i8, ptr [[PTR1]], i64 23048
%class.my = type { i32, [128 x i32], i32, [256 x %struct.pt]}
%struct.pt = type { ptr, i32, i32 }
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll
index b3096820425b8..2fad306c5bf3c 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll
@@ -47,10 +47,8 @@ define void @testDisjointOrSplits(ptr %p) {
; CHECK-LABEL: define void @testDisjointOrSplits(
; CHECK-SAME: ptr [[P:%.*]]) {
; CHECK-NEXT: [[VAR:%.*]] = tail call i64 @foo()
-; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[VAR]]
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 10
-; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[VAR]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 10
; CHECK-NEXT: store i8 0, ptr [[TMP4]], align 1
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll
index b0e88ef835f6a..a6b38bc9924a7 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll
@@ -31,11 +31,9 @@ define void @test_A_sub_B_add_ConstantInt(ptr %p) {
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[REM]] to i64
; CHECK-NEXT: [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[SUB22]], 2
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 2044
-; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 2044
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 [[TMP4]]
; CHECK-NEXT: store float 1.000000e+00, ptr [[TMP7]], align 4
; CHECK-NEXT: br label [[COND_END]]
; CHECK: cond.end:
More information about the llvm-commits
mailing list