[llvm] [SCEVExpander] Clear flags when reusing GEP (PR #109293)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 19 07:54:32 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Nikita Popov (nikic)
<details>
<summary>Changes</summary>
As pointed out in the review of #<!-- -->102133, SCEVExpander currently incorrectly reuses GEP instructions that have poison-generating flags set. Fix this by clearing the flags on the reused instruction.
---
Full diff: https://github.com/llvm/llvm-project/pull/109293.diff
5 Files Affected:
- (modified) llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h (+1)
- (modified) llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp (+14-5)
- (modified) llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll (+12-8)
- (modified) llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll (+4-4)
- (modified) llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll (+1-1)
``````````diff
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 62c1e15a9a60e1..0af3efeacd040c 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -47,6 +47,7 @@ struct PoisonFlags {
unsigned Exact : 1;
unsigned Disjoint : 1;
unsigned NNeg : 1;
+ GEPNoWrapFlags GEPNW;
PoisonFlags(const Instruction *I);
void apply(Instruction *I);
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 0927a3015818fd..1088547e1f3efe 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -49,6 +49,7 @@ PoisonFlags::PoisonFlags(const Instruction *I) {
Exact = false;
Disjoint = false;
NNeg = false;
+ GEPNW = GEPNoWrapFlags::none();
if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
NUW = OBO->hasNoUnsignedWrap();
NSW = OBO->hasNoSignedWrap();
@@ -63,6 +64,8 @@ PoisonFlags::PoisonFlags(const Instruction *I) {
NUW = TI->hasNoUnsignedWrap();
NSW = TI->hasNoSignedWrap();
}
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
+ GEPNW = GEP->getNoWrapFlags();
}
void PoisonFlags::apply(Instruction *I) {
@@ -80,6 +83,8 @@ void PoisonFlags::apply(Instruction *I) {
I->setHasNoUnsignedWrap(NUW);
I->setHasNoSignedWrap(NSW);
}
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
+ GEP->setNoWrapFlags(GEPNW);
}
/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
@@ -370,11 +375,15 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) {
// generated code.
if (isa<DbgInfoIntrinsic>(IP))
ScanLimit++;
- if (IP->getOpcode() == Instruction::GetElementPtr &&
- IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
- cast<GEPOperator>(&*IP)->getSourceElementType() ==
- Builder.getInt8Ty())
- return &*IP;
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(IP)) {
+ if (GEP->getPointerOperand() == V &&
+ GEP->getSourceElementType() == Builder.getInt8Ty() &&
+ GEP->getOperand(1) == Idx) {
+ rememberFlags(GEP);
+ GEP->setNoWrapFlags(GEPNoWrapFlags::none());
+ return &*IP;
+ }
+ }
if (IP == BlockBegin) break;
}
}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll b/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll
index d4518d40e42986..75612ba645ca4d 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll
@@ -9,19 +9,21 @@ target triple = "wasm32-unknown-unknown"
define void @shl_loop(ptr %a, i8 %shift, i32 %count) {
; CHECK-LABEL: shl_loop:
; CHECK: .functype shl_loop (i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: .LBB0_1: # %body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: loop # label0:
; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.tee 3
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.shl
-; CHECK-NEXT: v128.store 16
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.const 16
-; CHECK-NEXT: i32.add
+; CHECK-NEXT: v128.store 0
+; CHECK-NEXT: local.get 3
; CHECK-NEXT: local.set 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i32.const -1
@@ -56,23 +58,25 @@ exit:
define void @shl_phi_loop(ptr %a, i8 %shift, i32 %count) {
; CHECK-LABEL: shl_phi_loop:
; CHECK: .functype shl_phi_loop (i32, i32, i32) -> ()
+; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: .LBB1_1: # %body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: loop # label1:
; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.tee 3
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i8x16.shl
-; CHECK-NEXT: v128.store 16
+; CHECK-NEXT: v128.store 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: local.set 1
-; CHECK-NEXT: local.get 0
-; CHECK-NEXT: i32.const 16
-; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 3
; CHECK-NEXT: local.set 0
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i32.const -1
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll
index 5e72e13a26edb9..8f1c95fd4a330b 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll
@@ -79,11 +79,11 @@ define void @lsr_crash_preserve_addrspace_unknown_type2(ptr addrspace(5) %array,
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
-; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[ARRAY2]], i32 [[J]]
-; CHECK-NEXT: [[T:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
+; CHECK-NEXT: [[IDX:%.*]] = getelementptr i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
+; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i8, ptr addrspace(3) [[ARRAY2]], i32 [[J]]
+; CHECK-NEXT: [[T:%.*]] = getelementptr i8, ptr addrspace(5) [[ARRAY]], i32 [[J]]
; CHECK-NEXT: [[N8:%.*]] = load i8, ptr addrspace(5) [[T]], align 4
-; CHECK-NEXT: [[N7:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[T]], i32 42
+; CHECK-NEXT: [[N7:%.*]] = getelementptr i8, ptr addrspace(5) [[T]], i32 42
; CHECK-NEXT: [[N9:%.*]] = load i8, ptr addrspace(5) [[N7]], align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[J]], 42
; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN17:.*]], label %[[FOR_INC]]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll b/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll
index 745b54e2bdc642..1709ec1086042f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll
@@ -25,7 +25,7 @@ define ptr @negativeOneCase(ptr returned %a, ptr nocapture readonly %b, i32 %n)
; CHECK-NEXT: br label [[WHILE_COND:%.*]]
; CHECK: while.cond:
; CHECK-NEXT: [[P_0:%.*]] = phi ptr [ [[ADD_PTR]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_0]], i32 1
+; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr i8, ptr [[P_0]], i32 1
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP0]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_COND2_PREHEADER:%.*]], label [[WHILE_COND]]
``````````
</details>
https://github.com/llvm/llvm-project/pull/109293
More information about the llvm-commits
mailing list