[llvm] r355217 - [LICM] Infer proper alignment from loads during scalar promotion
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 1 10:45:05 PST 2019
Author: reames
Date: Fri Mar 1 10:45:05 2019
New Revision: 355217
URL: http://llvm.org/viewvc/llvm-project?rev=355217&view=rev
Log:
[LICM] Infer proper alignment from loads during scalar promotion
This patch fixes an issue where we would compute an unnecessarily small alignment during scalar promotion when no store is not to be guaranteed to execute, but we've proven load speculation safety. Since speculating a load requires proving the existing alignment is valid at the new location (see Loads.cpp), we can use the alignment fact from the load.
For non-atomics, this is a performance problem. For atomics, this is a correctness issue, though an *incredibly* rare one to see in practice. For atomics, we might not be able to lower an improperly aligned load or store (i.e. i32 align 1). If such an instruction makes it all the way to codegen, we *may* fail to codegen the operation, or we may simply generate a slow call to a library function. The part that makes this super hard to see in practice is that the memory location actually *is* well aligned, and instcombine knows that. So, to see a failure, you have to have a) hit the bug in LICM, b) somehow hit a depth limit in InstCombine/ValueTracking to avoid fixing the alignment, and c) then have generated an instruction which fails codegen rather than simply emitting a slow libcall. All around, pretty hard to hit.
Differential Revision: https://reviews.llvm.org/D58809
Modified:
llvm/trunk/lib/Transforms/Scalar/LICM.cpp
llvm/trunk/test/Transforms/LICM/promote-tls.ll
llvm/trunk/test/Transforms/LICM/scalar-promote-unwind.ll
Modified: llvm/trunk/lib/Transforms/Scalar/LICM.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LICM.cpp?rev=355217&r1=355216&r2=355217&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LICM.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp Fri Mar 1 10:45:05 2019
@@ -1924,9 +1924,21 @@ bool llvm::promoteLoopAccessesToScalars(
SawUnorderedAtomic |= Load->isAtomic();
SawNotAtomic |= !Load->isAtomic();
- if (!DereferenceableInPH)
- DereferenceableInPH = isSafeToExecuteUnconditionally(
- *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator());
+ unsigned InstAlignment = Load->getAlignment();
+ if (!InstAlignment)
+ InstAlignment =
+ MDL.getABITypeAlignment(Load->getType());
+
+ // Note that proving a load safe to speculate requires proving
+ // sufficient alignment at the target location. Proving it guaranteed
+ // to execute does as well. Thus we can increase our guaranteed
+ // alignment as well.
+ if (!DereferenceableInPH || (InstAlignment > Alignment))
+ if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo,
+ ORE, Preheader->getTerminator())) {
+ DereferenceableInPH = true;
+ Alignment = std::max(Alignment, InstAlignment);
+ }
} else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
@@ -1997,6 +2009,14 @@ bool llvm::promoteLoopAccessesToScalars(
if (SawUnorderedAtomic && SawNotAtomic)
return false;
+ // If we're inserting an atomic load in the preheader, we must be able to
+ // lower it. We're only guaranteed to be able to lower naturally aligned
+ // atomics.
+ auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
+ if (SawUnorderedAtomic &&
+ Alignment < MDL.getTypeStoreSize(SomePtrElemType))
+ return false;
+
// If we couldn't prove we can hoist the load, bail.
if (!DereferenceableInPH)
return false;
Modified: llvm/trunk/test/Transforms/LICM/promote-tls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LICM/promote-tls.ll?rev=355217&r1=355216&r2=355217&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LICM/promote-tls.ll (original)
+++ llvm/trunk/test/Transforms/LICM/promote-tls.ll Fri Mar 1 10:45:05 2019
@@ -22,7 +22,7 @@ entry:
for.body.lr.ph: ; preds = %entry
; CHECK-LABEL: for.body.lr.ph:
-; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 1
+; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 4
br label %for.header
for.header:
@@ -35,7 +35,7 @@ for.header:
early-exit:
; CHECK-LABEL: early-exit:
-; CHECK: store i32 %new1.lcssa, i32* %addr, align 1
+; CHECK: store i32 %new1.lcssa, i32* %addr, align 4
ret i32* null
for.body:
@@ -47,7 +47,7 @@ for.body:
for.cond.for.end_crit_edge: ; preds = %for.body
; CHECK-LABEL: for.cond.for.end_crit_edge:
-; CHECK: store i32 %new.lcssa, i32* %addr, align 1
+; CHECK: store i32 %new.lcssa, i32* %addr, align 4
%split = phi i32* [ %addr, %for.body ]
ret i32* null
}
@@ -62,7 +62,7 @@ entry:
for.body.lr.ph: ; preds = %entry
; CHECK-LABEL: for.body.lr.ph:
-; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 1
+; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 4
br label %for.header
for.header:
@@ -75,7 +75,7 @@ for.header:
early-exit:
; CHECK-LABEL: early-exit:
-; CHECK: store i32 %new1.lcssa, i32* %addr, align 1
+; CHECK: store i32 %new1.lcssa, i32* %addr, align 4
ret i32* null
for.body:
@@ -87,7 +87,7 @@ for.body:
for.cond.for.end_crit_edge: ; preds = %for.body
; CHECK-LABEL: for.cond.for.end_crit_edge:
-; CHECK: store i32 %new.lcssa, i32* %addr, align 1
+; CHECK: store i32 %new.lcssa, i32* %addr, align 4
%split = phi i32* [ %addr, %for.body ]
ret i32* null
}
Modified: llvm/trunk/test/Transforms/LICM/scalar-promote-unwind.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LICM/scalar-promote-unwind.ll?rev=355217&r1=355216&r2=355217&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LICM/scalar-promote-unwind.ll (original)
+++ llvm/trunk/test/Transforms/LICM/scalar-promote-unwind.ll Fri Mar 1 10:45:05 2019
@@ -74,7 +74,7 @@ define void @test3(i1 zeroext %y) uwtabl
entry:
; CHECK-LABEL: entry:
; CHECK-NEXT: %a = alloca i32
-; CHECK-NEXT: %a.promoted = load i32, i32* %a, align 1
+; CHECK-NEXT: %a.promoted = load i32, i32* %a, align 4
%a = alloca i32
br label %for.body
@@ -90,20 +90,18 @@ for.body:
for.cond.cleanup:
; CHECK-LABEL: for.cond.cleanup:
-; CHECK: store i32 %add.lcssa, i32* %a, align 1
+; CHECK: store i32 %add.lcssa, i32* %a, align 4
; CHECK-NEXT: ret void
ret void
}
;; Same as test3, but with unordered atomics
-;; FIXME: doing the transform w/o alignment here is wrong since we're
-;; creating an unaligned atomic which we may not be able to lower.
define void @test3b(i1 zeroext %y) uwtable {
; CHECK-LABEL: @test3
entry:
; CHECK-LABEL: entry:
; CHECK-NEXT: %a = alloca i32
-; CHECK-NEXT: %a.promoted = load atomic i32, i32* %a unordered, align 1
+; CHECK-NEXT: %a.promoted = load atomic i32, i32* %a unordered, align 4
%a = alloca i32
br label %for.body
@@ -119,7 +117,7 @@ for.body:
for.cond.cleanup:
; CHECK-LABEL: for.cond.cleanup:
-; CHECK: store atomic i32 %add.lcssa, i32* %a unordered, align 1
+; CHECK: store atomic i32 %add.lcssa, i32* %a unordered, align 4
; CHECK-NEXT: ret void
ret void
}
More information about the llvm-commits
mailing list