[clang] cece0a1 - Revert "[ConstantFold] Drop gep of gep fold entirely (#95126)"

Nikita Popov via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 12 02:52:27 PDT 2024


Author: Nikita Popov
Date: 2024-06-12T11:52:12+02:00
New Revision: cece0a105b29dcbb9d88d0aa264c4745c07a8456

URL: https://github.com/llvm/llvm-project/commit/cece0a105b29dcbb9d88d0aa264c4745c07a8456
DIFF: https://github.com/llvm/llvm-project/commit/cece0a105b29dcbb9d88d0aa264c4745c07a8456.diff

LOG: Revert "[ConstantFold] Drop gep of gep fold entirely (#95126)"

This reverts commit 3b3b839c66dc49674fd6646650525a2173030690.

This broke the flang+openmp+offload buildbot, as reported in
https://github.com/llvm/llvm-project/pull/95126#issuecomment-2162424019.

Added: 
    

Modified: 
    clang/test/CodeGenCUDA/managed-var.cu
    clang/test/CodeGenCXX/2011-12-19-init-list-ctor.cpp
    clang/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist-pr12086.cpp
    clang/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp
    clang/test/CodeGenCXX/ms-inline-asm-fields.cpp
    clang/test/OpenMP/threadprivate_codegen.cpp
    llvm/include/llvm/IR/ConstantFold.h
    llvm/lib/Analysis/InstructionSimplify.cpp
    llvm/lib/IR/ConstantFold.cpp
    llvm/lib/IR/Constants.cpp

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGenCUDA/managed-var.cu b/clang/test/CodeGenCUDA/managed-var.cu
index 07e1a1e692c75..5206acc62fe00 100644
--- a/clang/test/CodeGenCUDA/managed-var.cu
+++ b/clang/test/CodeGenCUDA/managed-var.cu
@@ -127,10 +127,9 @@ __device__ __host__ float load2() {
 
 // HOST-LABEL: define {{.*}}@_Z5load3v()
 // HOST:  %ld.managed = load ptr, ptr @v2, align 16
-// HOST:  %0 = getelementptr inbounds [100 x %struct.vec], ptr %ld.managed, i64 0, i64 1
-// HOST:  %1 = getelementptr inbounds %struct.vec, ptr %0, i32 0, i32 1
-// HOST:  %2 = load float, ptr %1, align 4
-// HOST:  ret float %2
+// HOST:  %0 = getelementptr inbounds [100 x %struct.vec], ptr %ld.managed, i64 0, i64 1, i32 1
+// HOST:  %1 = load float, ptr %0, align 4
+// HOST:  ret float %1
 float load3() {
   return v2[1].y;
 }
@@ -140,11 +139,10 @@ float load3() {
 // HOST:  %0 = getelementptr inbounds [100 x %struct.vec], ptr %ld.managed, i64 0, i64 1
 // HOST:  %1 = ptrtoint ptr %0 to i64
 // HOST:  %ld.managed1 = load ptr, ptr @v2, align 16
-// HOST:  %2 = getelementptr inbounds [100 x %struct.vec], ptr %ld.managed1, i64 0, i64 1
-// HOST:  %3 = getelementptr inbounds %struct.vec, ptr %2, i32 0, i32 1
-// HOST:  %4 = ptrtoint ptr %3 to i64
-// HOST:  %5 = sub i64 %4, %1
-// HOST:  %sub.ptr.div = sdiv exact i64 %5, 4
+// HOST:  %2 = getelementptr inbounds [100 x %struct.vec], ptr %ld.managed1, i64 0, i64 1, i32 1
+// HOST:  %3 = ptrtoint ptr %2 to i64
+// HOST:  %4 = sub i64 %3, %1
+// HOST:  %sub.ptr.div = sdiv exact i64 %4, 4
 // HOST:  %conv = sitofp i64 %sub.ptr.div to float
 // HOST:  ret float %conv
 float addr_taken2() {

diff  --git a/clang/test/CodeGenCXX/2011-12-19-init-list-ctor.cpp b/clang/test/CodeGenCXX/2011-12-19-init-list-ctor.cpp
index 14557829268ef..4033adc8f0390 100644
--- a/clang/test/CodeGenCXX/2011-12-19-init-list-ctor.cpp
+++ b/clang/test/CodeGenCXX/2011-12-19-init-list-ctor.cpp
@@ -21,6 +21,6 @@ struct S {
 // CHECK: store i32 0, ptr @arr
 // CHECK: call void @_ZN1AC1EPKc(ptr {{[^,]*}} getelementptr inbounds (%struct.S, ptr @arr, i32 0, i32 1), ptr noundef @.str)
 // CHECK: store i32 1, ptr getelementptr inbounds (%struct.S, ptr @arr, i64 1)
-// CHECK: call void @_ZN1AC1EPKc(ptr {{[^,]*}} getelementptr inbounds (%struct.S, ptr getelementptr inbounds (%struct.S, ptr @arr, i64 1), i32 0, i32 1), ptr noundef @.str.1)
+// CHECK: call void @_ZN1AC1EPKc(ptr {{[^,]*}} getelementptr inbounds (%struct.S, ptr @arr, i64 1, i32 1), ptr noundef @.str.1)
 // CHECK: store i32 2, ptr getelementptr inbounds (%struct.S, ptr @arr, i64 2)
-// CHECK: call void @_ZN1AC1EPKc(ptr {{[^,]*}} getelementptr inbounds (%struct.S, ptr getelementptr inbounds (%struct.S, ptr @arr, i64 2), i32 0, i32 1), ptr noundef @.str.2)
+// CHECK: call void @_ZN1AC1EPKc(ptr {{[^,]*}} getelementptr inbounds (%struct.S, ptr @arr, i64 2, i32 1), ptr noundef @.str.2)

diff  --git a/clang/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist-pr12086.cpp b/clang/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist-pr12086.cpp
index caa92f47a93c2..6fbe4c7fd17a7 100644
--- a/clang/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist-pr12086.cpp
+++ b/clang/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist-pr12086.cpp
@@ -79,12 +79,12 @@ std::initializer_list<std::initializer_list<int>> nested = {
 // CHECK-DYNAMIC-BL: store i32 {{.*}}, ptr getelementptr inbounds (i32, ptr @_ZGR6nested1_, i64 1)
 // CHECK-DYNAMIC-BL: store ptr @_ZGR6nested1_,
 // CHECK-DYNAMIC-BL:       ptr getelementptr inbounds ({{.*}}, ptr @_ZGR6nested_, i64 1), align 8
-// CHECK-DYNAMIC-BL: store i64 2, ptr getelementptr inbounds ({{.*}}, ptr getelementptr inbounds ({{.*}}, ptr @_ZGR6nested_, i64 1), i32 0, i32 1), align 8
+// CHECK-DYNAMIC-BL: store i64 2, ptr getelementptr inbounds ({{.*}}, ptr @_ZGR6nested_, i64 1, i32 1), align 8
 // CHECK-DYNAMIC-BL: store i32 5, ptr @_ZGR6nested2_
 // CHECK-DYNAMIC-BL: store i32 {{.*}}, ptr getelementptr inbounds (i32, ptr @_ZGR6nested2_, i64 1)
 // CHECK-DYNAMIC-BL: store ptr @_ZGR6nested2_,
 // CHECK-DYNAMIC-BL:       ptr getelementptr inbounds ({{.*}}, ptr @_ZGR6nested_, i64 2), align 8
-// CHECK-DYNAMIC-BL: store i64 2, ptr getelementptr inbounds ({{.*}}, ptr getelementptr inbounds ({{.*}}, ptr @_ZGR6nested_, i64 2), i32 0, i32 1), align 8
+// CHECK-DYNAMIC-BL: store i64 2, ptr getelementptr inbounds ({{.*}}, ptr @_ZGR6nested_, i64 2, i32 1), align 8
 // CHECK-DYNAMIC-BL: store ptr @_ZGR6nested_,
 // CHECK-DYNAMIC-BL:       ptr @nested, align 8
 // CHECK-DYNAMIC-BL: store i64 3, ptr getelementptr inbounds ({{.*}}, ptr @nested, i32 0, i32 1), align 8
@@ -119,13 +119,13 @@ std::initializer_list<std::initializer_list<int>> nested = {
 // CHECK-DYNAMIC-BE: store ptr @_ZGR6nested1_,
 // CHECK-DYNAMIC-BE:       ptr getelementptr inbounds ({{.*}}, ptr @_ZGR6nested_, i64 1), align 8
 // CHECK-DYNAMIC-BE: store ptr getelementptr inbounds ([2 x i32], ptr @_ZGR6nested1_, i64 0, i64 2),
-// CHECK-DYNAMIC-BE:       ptr getelementptr inbounds ({{.*}}, ptr getelementptr inbounds ({{.*}}, ptr @_ZGR6nested_, i64 1), i32 0, i32 1), align 8
+// CHECK-DYNAMIC-BE:       ptr getelementptr inbounds ({{.*}}, ptr @_ZGR6nested_, i64 1, i32 1), align 8
 // CHECK-DYNAMIC-BE: store i32 5, ptr @_ZGR6nested2_
 // CHECK-DYNAMIC-BE: store i32 {{.*}}, ptr getelementptr inbounds (i32, ptr @_ZGR6nested2_, i64 1)
 // CHECK-DYNAMIC-BE: store ptr @_ZGR6nested2_,
 // CHECK-DYNAMIC-BE:       ptr getelementptr inbounds ({{.*}}, ptr @_ZGR6nested_, i64 2), align 8
 // CHECK-DYNAMIC-BE: store ptr getelementptr inbounds ([2 x i32], ptr @_ZGR6nested2_, i64 0, i64 2),
-// CHECK-DYNAMIC-BE:       ptr getelementptr inbounds ({{.*}}, ptr getelementptr inbounds ({{.*}}, ptr @_ZGR6nested_, i64 2), i32 0, i32 1), align 8
+// CHECK-DYNAMIC-BE:       ptr getelementptr inbounds ({{.*}}, ptr @_ZGR6nested_, i64 2, i32 1), align 8
 // CHECK-DYNAMIC-BE: store ptr @_ZGR6nested_,
 // CHECK-DYNAMIC-BE:       ptr @nested, align 8
 // CHECK-DYNAMIC-BE: store ptr getelementptr inbounds ([3 x {{.*}}], ptr @_ZGR6nested_, i64 0, i64 3),

diff  --git a/clang/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp b/clang/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp
index ef05f0334cd75..3d0cf968bfd54 100644
--- a/clang/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp
+++ b/clang/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp
@@ -365,12 +365,12 @@ namespace partly_constant {
   //
   // Second init list.
   // CHECK: store ptr {{.*}}@[[PARTLY_CONSTANT_SECOND]]{{.*}}, ptr getelementptr inbounds ({{.*}}, ptr {{.*}}@[[PARTLY_CONSTANT_INNER]]{{.*}}, i64 1)
-  // CHECK: store i64 2, ptr getelementptr inbounds ({{.*}}, ptr getelementptr inbounds ({{.*}}, ptr {{.*}}@[[PARTLY_CONSTANT_INNER]]{{.*}}, i64 1), i32 0, i32 1)
+  // CHECK: store i64 2, ptr getelementptr inbounds ({{.*}}, ptr {{.*}}@[[PARTLY_CONSTANT_INNER]]{{.*}}, i64 1, i32 1)
   //
   // Third init list.
   // CHECK-NOT: @[[PARTLY_CONSTANT_THIRD]],
   // CHECK: store ptr {{.*}}@[[PARTLY_CONSTANT_THIRD]]{{.*}}, ptr getelementptr inbounds ({{.*}}, ptr {{.*}}@[[PARTLY_CONSTANT_INNER]]{{.*}}, i64 2)
-  // CHECK: store i64 4, ptr getelementptr inbounds ({{.*}}, ptr getelementptr inbounds ({{.*}}, ptr {{.*}}@[[PARTLY_CONSTANT_INNER]]{{.*}}, i64 2), i32 0, i32 1)
+  // CHECK: store i64 4, ptr getelementptr inbounds ({{.*}}, ptr {{.*}}@[[PARTLY_CONSTANT_INNER]]{{.*}}, i64 2, i32 1)
   // CHECK-NOT: @[[PARTLY_CONSTANT_THIRD]],
   //
   // Outer init list.

diff  --git a/clang/test/CodeGenCXX/ms-inline-asm-fields.cpp b/clang/test/CodeGenCXX/ms-inline-asm-fields.cpp
index e3441d0b4614e..403e9c8427760 100644
--- a/clang/test/CodeGenCXX/ms-inline-asm-fields.cpp
+++ b/clang/test/CodeGenCXX/ms-inline-asm-fields.cpp
@@ -24,7 +24,7 @@ extern "C" int test_param_field(A p) {
 
 extern "C" int test_namespace_global() {
 // CHECK: define{{.*}} i32 @test_namespace_global()
-// CHECK: call i32 asm sideeffect inteldialect "mov eax, $1", "{{.*}}"(ptr elementtype(i32) getelementptr inbounds (%"struct.A::B", ptr getelementptr inbounds (%struct.A, ptr @_ZN4asdf8a_globalE, i32 0, i32 2), i32 0, i32 1))
+// CHECK: call i32 asm sideeffect inteldialect "mov eax, $1", "{{.*}}"(ptr elementtype(i32) getelementptr inbounds (%struct.A, ptr @_ZN4asdf8a_globalE, i32 0, i32 2, i32 1))
 // CHECK: ret i32
   __asm mov eax, asdf::a_global.a3.b2
 }

diff  --git a/clang/test/OpenMP/threadprivate_codegen.cpp b/clang/test/OpenMP/threadprivate_codegen.cpp
index 7a6269954d39e..2ee328008cc4b 100644
--- a/clang/test/OpenMP/threadprivate_codegen.cpp
+++ b/clang/test/OpenMP/threadprivate_codegen.cpp
@@ -2586,7 +2586,7 @@ int foobar() {
 // SIMD1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[RES]], align 4
 // SIMD1-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
 // SIMD1-NEXT:    store i32 [[ADD3]], ptr [[RES]], align 4
-// SIMD1-NEXT:    [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([3 x %struct.S1], ptr getelementptr inbounds ([2 x [3 x %struct.S1]], ptr @arr_x, i64 0, i64 1), i64 0, i64 1), align 4
+// SIMD1-NEXT:    [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([2 x [3 x %struct.S1]], ptr @arr_x, i64 0, i64 1, i64 1), align 4
 // SIMD1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[RES]], align 4
 // SIMD1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
 // SIMD1-NEXT:    store i32 [[ADD4]], ptr [[RES]], align 4
@@ -2663,7 +2663,7 @@ int foobar() {
 // SIMD1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[RES]], align 4
 // SIMD1-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
 // SIMD1-NEXT:    store i32 [[ADD2]], ptr [[RES]], align 4
-// SIMD1-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([3 x %struct.S1], ptr getelementptr inbounds ([2 x [3 x %struct.S1]], ptr @arr_x, i64 0, i64 1), i64 0, i64 1), align 4
+// SIMD1-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([2 x [3 x %struct.S1]], ptr @arr_x, i64 0, i64 1, i64 1), align 4
 // SIMD1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[RES]], align 4
 // SIMD1-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP7]]
 // SIMD1-NEXT:    store i32 [[ADD3]], ptr [[RES]], align 4
@@ -3052,7 +3052,7 @@ int foobar() {
 // SIMD2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG187:![0-9]+]]
 // SIMD2-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG187]]
 // SIMD2-NEXT:    store i32 [[ADD3]], ptr [[RES]], align 4, !dbg [[DBG187]]
-// SIMD2-NEXT:    [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([3 x %struct.S1], ptr getelementptr inbounds ([2 x [3 x %struct.S1]], ptr @arr_x, i64 0, i64 1), i64 0, i64 1), align 4, !dbg [[DBG188:![0-9]+]]
+// SIMD2-NEXT:    [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([2 x [3 x %struct.S1]], ptr @arr_x, i64 0, i64 1, i64 1), align 4, !dbg [[DBG188:![0-9]+]]
 // SIMD2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG189:![0-9]+]]
 // SIMD2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG189]]
 // SIMD2-NEXT:    store i32 [[ADD4]], ptr [[RES]], align 4, !dbg [[DBG189]]
@@ -3133,7 +3133,7 @@ int foobar() {
 // SIMD2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG222:![0-9]+]]
 // SIMD2-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG222]]
 // SIMD2-NEXT:    store i32 [[ADD2]], ptr [[RES]], align 4, !dbg [[DBG222]]
-// SIMD2-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([3 x %struct.S1], ptr getelementptr inbounds ([2 x [3 x %struct.S1]], ptr @arr_x, i64 0, i64 1), i64 0, i64 1), align 4, !dbg [[DBG223:![0-9]+]]
+// SIMD2-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([2 x [3 x %struct.S1]], ptr @arr_x, i64 0, i64 1, i64 1), align 4, !dbg [[DBG223:![0-9]+]]
 // SIMD2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG224:![0-9]+]]
 // SIMD2-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP7]], !dbg [[DBG224]]
 // SIMD2-NEXT:    store i32 [[ADD3]], ptr [[RES]], align 4, !dbg [[DBG224]]
@@ -5707,7 +5707,7 @@ int foobar() {
 // SIMD3-NEXT:    [[TMP12:%.*]] = load i32, ptr [[RES]], align 4
 // SIMD3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
 // SIMD3-NEXT:    store i32 [[ADD3]], ptr [[RES]], align 4
-// SIMD3-NEXT:    [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([3 x %struct.S1], ptr getelementptr inbounds ([2 x [3 x %struct.S1]], ptr @arr_x, i64 0, i64 1), i64 0, i64 1), align 4
+// SIMD3-NEXT:    [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([2 x [3 x %struct.S1]], ptr @arr_x, i64 0, i64 1, i64 1), align 4
 // SIMD3-NEXT:    [[TMP14:%.*]] = load i32, ptr [[RES]], align 4
 // SIMD3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
 // SIMD3-NEXT:    store i32 [[ADD4]], ptr [[RES]], align 4
@@ -5784,7 +5784,7 @@ int foobar() {
 // SIMD3-NEXT:    [[TMP6:%.*]] = load i32, ptr [[RES]], align 4
 // SIMD3-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]]
 // SIMD3-NEXT:    store i32 [[ADD2]], ptr [[RES]], align 4
-// SIMD3-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([3 x %struct.S1], ptr getelementptr inbounds ([2 x [3 x %struct.S1]], ptr @arr_x, i64 0, i64 1), i64 0, i64 1), align 4
+// SIMD3-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([2 x [3 x %struct.S1]], ptr @arr_x, i64 0, i64 1, i64 1), align 4
 // SIMD3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[RES]], align 4
 // SIMD3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP7]]
 // SIMD3-NEXT:    store i32 [[ADD3]], ptr [[RES]], align 4
@@ -6173,7 +6173,7 @@ int foobar() {
 // SIMD4-NEXT:    [[TMP12:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG187:![0-9]+]]
 // SIMD4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG187]]
 // SIMD4-NEXT:    store i32 [[ADD3]], ptr [[RES]], align 4, !dbg [[DBG187]]
-// SIMD4-NEXT:    [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([3 x %struct.S1], ptr getelementptr inbounds ([2 x [3 x %struct.S1]], ptr @arr_x, i64 0, i64 1), i64 0, i64 1), align 4, !dbg [[DBG188:![0-9]+]]
+// SIMD4-NEXT:    [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([2 x [3 x %struct.S1]], ptr @arr_x, i64 0, i64 1, i64 1), align 4, !dbg [[DBG188:![0-9]+]]
 // SIMD4-NEXT:    [[TMP14:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG189:![0-9]+]]
 // SIMD4-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG189]]
 // SIMD4-NEXT:    store i32 [[ADD4]], ptr [[RES]], align 4, !dbg [[DBG189]]
@@ -6254,7 +6254,7 @@ int foobar() {
 // SIMD4-NEXT:    [[TMP6:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG222:![0-9]+]]
 // SIMD4-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG222]]
 // SIMD4-NEXT:    store i32 [[ADD2]], ptr [[RES]], align 4, !dbg [[DBG222]]
-// SIMD4-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([3 x %struct.S1], ptr getelementptr inbounds ([2 x [3 x %struct.S1]], ptr @arr_x, i64 0, i64 1), i64 0, i64 1), align 4, !dbg [[DBG223:![0-9]+]]
+// SIMD4-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([2 x [3 x %struct.S1]], ptr @arr_x, i64 0, i64 1, i64 1), align 4, !dbg [[DBG223:![0-9]+]]
 // SIMD4-NEXT:    [[TMP8:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG224:![0-9]+]]
 // SIMD4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP7]], !dbg [[DBG224]]
 // SIMD4-NEXT:    store i32 [[ADD3]], ptr [[RES]], align 4, !dbg [[DBG224]]

diff  --git a/llvm/include/llvm/IR/ConstantFold.h b/llvm/include/llvm/IR/ConstantFold.h
index 42043d365b2d3..9b3c8a0e5a632 100644
--- a/llvm/include/llvm/IR/ConstantFold.h
+++ b/llvm/include/llvm/IR/ConstantFold.h
@@ -52,7 +52,7 @@ namespace llvm {
                                           Constant *V2);
   Constant *ConstantFoldCompareInstruction(CmpInst::Predicate Predicate,
                                            Constant *C1, Constant *C2);
-  Constant *ConstantFoldGetElementPtr(Type *Ty, Constant *C,
+  Constant *ConstantFoldGetElementPtr(Type *Ty, Constant *C, bool InBounds,
                                       std::optional<ConstantRange> InRange,
                                       ArrayRef<Value *> Idxs);
 } // End llvm namespace

diff  --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 00895860ca49b..8b2aa6b9f18b0 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5068,8 +5068,9 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
     return nullptr;
 
   if (!ConstantExpr::isSupportedGetElementPtr(SrcTy))
-    return ConstantFoldGetElementPtr(SrcTy, cast<Constant>(Ptr), std::nullopt,
-                                     Indices);
+    // TODO(gep_nowrap): Pass on the whole GEPNoWrapFlags.
+    return ConstantFoldGetElementPtr(SrcTy, cast<Constant>(Ptr),
+                                     NW.isInBounds(), std::nullopt, Indices);
 
   auto *CE =
       ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ptr), Indices, NW);

diff  --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 34bcf36ec212c..77a833610a3a9 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -1404,7 +1404,35 @@ Constant *llvm::ConstantFoldCompareInstruction(CmpInst::Predicate Predicate,
   return nullptr;
 }
 
+// Combine Indices - If the source pointer to this getelementptr instruction
+// is a getelementptr instruction, combine the indices of the two
+// getelementptr instructions into a single instruction.
+static Constant *foldGEPOfGEP(GEPOperator *GEP, Type *PointeeTy, bool InBounds,
+                              ArrayRef<Value *> Idxs) {
+  if (PointeeTy != GEP->getResultElementType())
+    return nullptr;
+
+  // Leave inrange handling to DL-aware constant folding.
+  if (GEP->getInRange())
+    return nullptr;
+
+  // Only handle simple case with leading zero index. We cannot perform an
+  // actual addition as we don't know the correct index type size to use.
+  Constant *Idx0 = cast<Constant>(Idxs[0]);
+  if (!Idx0->isNullValue())
+    return nullptr;
+
+  SmallVector<Value*, 16> NewIndices;
+  NewIndices.reserve(Idxs.size() + GEP->getNumIndices());
+  NewIndices.append(GEP->idx_begin(), GEP->idx_end());
+  NewIndices.append(Idxs.begin() + 1, Idxs.end());
+  return ConstantExpr::getGetElementPtr(
+      GEP->getSourceElementType(), cast<Constant>(GEP->getPointerOperand()),
+      NewIndices, InBounds && GEP->isInBounds());
+}
+
 Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
+                                          bool InBounds,
                                           std::optional<ConstantRange> InRange,
                                           ArrayRef<Value *> Idxs) {
   if (Idxs.empty()) return C;
@@ -1434,5 +1462,10 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
                      cast<VectorType>(GEPTy)->getElementCount(), C)
                : C;
 
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    if (auto *GEP = dyn_cast<GEPOperator>(CE))
+      if (Constant *C = foldGEPOfGEP(GEP, PointeeTy, InBounds, Idxs))
+        return C;
+
   return nullptr;
 }

diff  --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index d07907372f0e4..a76be441875a1 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -2438,7 +2438,8 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
   assert(Ty && "Must specify element type");
   assert(isSupportedGetElementPtr(Ty) && "Element type is unsupported!");
 
-  if (Constant *FC = ConstantFoldGetElementPtr(Ty, C, InRange, Idxs))
+  if (Constant *FC =
+          ConstantFoldGetElementPtr(Ty, C, NW.isInBounds(), InRange, Idxs))
     return FC; // Fold a few common cases.
 
   assert(GetElementPtrInst::getIndexedType(Ty, Idxs) && "GEP indices invalid!");


        


More information about the cfe-commits mailing list