[llvm] 4b3ba64 - [SCEVExpander] Clear flags when reusing GEP (#109293)

via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 1 05:22:58 PDT 2024


Author: Nikita Popov
Date: 2024-10-01T14:22:54+02:00
New Revision: 4b3ba64ba71c06b6bc9db347a66a7316f5edbcc4

URL: https://github.com/llvm/llvm-project/commit/4b3ba64ba71c06b6bc9db347a66a7316f5edbcc4
DIFF: https://github.com/llvm/llvm-project/commit/4b3ba64ba71c06b6bc9db347a66a7316f5edbcc4.diff

LOG: [SCEVExpander] Clear flags when reusing GEP (#109293)

As pointed out in the review of #102133, SCEVExpander currently
incorrectly reuses GEP instructions that have poison-generating flags
set. Fix this by clearing the flags on the reused instruction.

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
    llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
    llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll
    llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll
    llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll
    llvm/unittests/Transforms/Utils/ScalarEvolutionExpanderTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 62c1e15a9a60e1..0af3efeacd040c 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -47,6 +47,7 @@ struct PoisonFlags {
   unsigned Exact : 1;
   unsigned Disjoint : 1;
   unsigned NNeg : 1;
+  GEPNoWrapFlags GEPNW;
 
   PoisonFlags(const Instruction *I);
   void apply(Instruction *I);

diff  --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 0927a3015818fd..1088547e1f3efe 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -49,6 +49,7 @@ PoisonFlags::PoisonFlags(const Instruction *I) {
   Exact = false;
   Disjoint = false;
   NNeg = false;
+  GEPNW = GEPNoWrapFlags::none();
   if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
     NUW = OBO->hasNoUnsignedWrap();
     NSW = OBO->hasNoSignedWrap();
@@ -63,6 +64,8 @@ PoisonFlags::PoisonFlags(const Instruction *I) {
     NUW = TI->hasNoUnsignedWrap();
     NSW = TI->hasNoSignedWrap();
   }
+  if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
+    GEPNW = GEP->getNoWrapFlags();
 }
 
 void PoisonFlags::apply(Instruction *I) {
@@ -80,6 +83,8 @@ void PoisonFlags::apply(Instruction *I) {
     I->setHasNoUnsignedWrap(NUW);
     I->setHasNoSignedWrap(NSW);
   }
+  if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
+    GEP->setNoWrapFlags(GEPNW);
 }
 
 /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
@@ -370,11 +375,15 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) {
       // generated code.
       if (isa<DbgInfoIntrinsic>(IP))
         ScanLimit++;
-      if (IP->getOpcode() == Instruction::GetElementPtr &&
-          IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
-          cast<GEPOperator>(&*IP)->getSourceElementType() ==
-              Builder.getInt8Ty())
-        return &*IP;
+      if (auto *GEP = dyn_cast<GetElementPtrInst>(IP)) {
+        if (GEP->getPointerOperand() == V &&
+            GEP->getSourceElementType() == Builder.getInt8Ty() &&
+            GEP->getOperand(1) == Idx) {
+          rememberFlags(GEP);
+          GEP->setNoWrapFlags(GEPNoWrapFlags::none());
+          return &*IP;
+        }
+      }
       if (IP == BlockBegin) break;
     }
   }

diff  --git a/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll b/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll
index d4518d40e42986..75612ba645ca4d 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll
@@ -9,19 +9,21 @@ target triple = "wasm32-unknown-unknown"
 define void @shl_loop(ptr %a, i8 %shift, i32 %count) {
 ; CHECK-LABEL: shl_loop:
 ; CHECK:         .functype shl_loop (i32, i32, i32) -> ()
+; CHECK-NEXT:    .local i32
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:  .LBB0_1: # %body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    loop # label0:
 ; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 16
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    local.tee 3
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    v128.load 0:p2align=0
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    i8x16.shl
-; CHECK-NEXT:    v128.store 16
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.const 16
-; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.store 0
+; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    local.set 0
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    i32.const -1
@@ -56,23 +58,25 @@ exit:
 define void @shl_phi_loop(ptr %a, i8 %shift, i32 %count) {
 ; CHECK-LABEL: shl_phi_loop:
 ; CHECK:         .functype shl_phi_loop (i32, i32, i32) -> ()
+; CHECK-NEXT:    .local i32
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:  .LBB1_1: # %body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    loop # label1:
 ; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 16
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    local.tee 3
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    v128.load 0:p2align=0
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    i8x16.shl
-; CHECK-NEXT:    v128.store 16
+; CHECK-NEXT:    v128.store 0
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    i32.const 1
 ; CHECK-NEXT:    i32.and
 ; CHECK-NEXT:    local.set 1
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.const 16
-; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    local.get 3
 ; CHECK-NEXT:    local.set 0
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    i32.const -1

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll
index 5e72e13a26edb9..8f1c95fd4a330b 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll
@@ -79,11 +79,11 @@ define void @lsr_crash_preserve_addrspace_unknown_type2(ptr addrspace(5) %array,
 ; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_BODY]]:
 ; CHECK-NEXT:    [[J:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
-; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[ARRAY2]], i32 [[J]]
-; CHECK-NEXT:    [[T:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
+; CHECK-NEXT:    [[IDX:%.*]] = getelementptr i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr i8, ptr addrspace(3) [[ARRAY2]], i32 [[J]]
+; CHECK-NEXT:    [[T:%.*]] = getelementptr i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
 ; CHECK-NEXT:    [[N8:%.*]] = load i8, ptr addrspace(5) [[T]], align 4
-; CHECK-NEXT:    [[N7:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[T]], i32 42
+; CHECK-NEXT:    [[N7:%.*]] = getelementptr i8, ptr addrspace(5) [[T]], i32 42
 ; CHECK-NEXT:    [[N9:%.*]] = load i8, ptr addrspace(5) [[N7]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[J]], 42
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN17:.*]], label %[[FOR_INC]]

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll b/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll
index 745b54e2bdc642..1709ec1086042f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll
@@ -25,7 +25,7 @@ define ptr @negativeOneCase(ptr returned %a, ptr nocapture readonly %b, i32 %n)
 ; CHECK-NEXT:    br label [[WHILE_COND:%.*]]
 ; CHECK:       while.cond:
 ; CHECK-NEXT:    [[P_0:%.*]] = phi ptr [ [[ADD_PTR]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_0]], i32 1
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr i8, ptr [[P_0]], i32 1
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_COND2_PREHEADER:%.*]], label [[WHILE_COND]]

diff  --git a/llvm/unittests/Transforms/Utils/ScalarEvolutionExpanderTest.cpp b/llvm/unittests/Transforms/Utils/ScalarEvolutionExpanderTest.cpp
index 4d6111cc257c3b..13bd69624867e9 100644
--- a/llvm/unittests/Transforms/Utils/ScalarEvolutionExpanderTest.cpp
+++ b/llvm/unittests/Transforms/Utils/ScalarEvolutionExpanderTest.cpp
@@ -954,4 +954,36 @@ TEST_F(ScalarEvolutionExpanderTest, ExpandNonIntegralPtrWithNullBase) {
   });
 }
 
+TEST_F(ScalarEvolutionExpanderTest, GEPFlags) {
+  LLVMContext C;
+  SMDiagnostic Err;
+  StringRef ModStr = R"(
+  define void @f(ptr %p, i64 %x) {
+    %gep_inbounds = getelementptr inbounds i8, ptr %p, i64 %x
+    ret void
+  })";
+  std::unique_ptr<Module> M = parseAssemblyString(ModStr, Err, C);
+
+  assert(M && "Could not parse module?");
+  assert(!verifyModule(*M) && "Must have been well formed!");
+
+  Function *F = M->getFunction("f");
+  ASSERT_NE(F, nullptr) << "Could not find function 'f'";
+  BasicBlock &Entry = F->getEntryBlock();
+  auto *GEP = cast<GetElementPtrInst>(&Entry.front());
+
+  ScalarEvolution SE = buildSE(*F);
+  const SCEV *Ptr = SE.getSCEV(F->getArg(0));
+  const SCEV *X = SE.getSCEV(F->getArg(1));
+  const SCEV *PtrX = SE.getAddExpr(Ptr, X);
+
+  SCEVExpander Exp(SE, M->getDataLayout(), "expander");
+  auto *I = cast<Instruction>(
+      Exp.expandCodeFor(PtrX, nullptr, Entry.getTerminator()));
+  // Check that the GEP is reused, but the inbounds flag cleared. We don't
+  // know that the newly introduced use is inbounds.
+  EXPECT_EQ(I, GEP);
+  EXPECT_EQ(GEP->getNoWrapFlags(), GEPNoWrapFlags::none());
+}
+
 } // end namespace llvm


        


More information about the llvm-commits mailing list