[llvm] 37d3030 - [ValueTracking, BasicAA] Don't simplify instructions

Sun Jun 21 07:31:25 PDT 2020

Author: Nikita Popov
Date: 2020-06-21T16:31:07+02:00
New Revision: 37d3030711cc30564fb142154e4e8cabdc91724e

URL: https://github.com/llvm/llvm-project/commit/37d3030711cc30564fb142154e4e8cabdc91724e
DIFF: https://github.com/llvm/llvm-project/commit/37d3030711cc30564fb142154e4e8cabdc91724e.diff

LOG: [ValueTracking, BasicAA] Don't simplify instructions

GetUnderlyingObject() (and by required symmetry
DecomposeGEPExpression()) will call SimplifyInstruction() on the
passed value if other checks fail. This simplification is very
expensive, but has little effect in practice. This patch removes
the SimplifyInstruction call(), and replaces it with a check for
single-argument phis (which can occur in canonical IR in LCSSA
form), which is the only useful simplification case I was able to
identify.

At O3 the geomean CTMark improvement is -1.7%. The largest
improvement is SPASS with ThinLTO at -6%.

In test-suite, I see only two tests with a hash difference and
no code size difference (PAQ8p, Ptrdist), which indicates that
the simplification only ends up being useful very rarely. (I would
have liked to figure out which simplification is responsible here,
but wasn't able to spot it looking at transformation logs.)

The AMDGPU test case that is update was using two selects with
undef condition, in which case GetUnderlyingObject will return
the first select operand as the underlying object. This will of
course not happen with non-undef conditions, so this was not
testing anything realistic. Additionally this illustrates potential
unsoundness: While GetUnderlyingObject will pick the first operand,
the select might be later replaced by the second operand, resulting
in inconsistent assumptions about the undef value.

Differential Revision: https://reviews.llvm.org/D82261

Added: 
    

Modified: 
    llvm/lib/Analysis/BasicAliasAnalysis.cpp
    llvm/lib/Analysis/ValueTracking.cpp
    llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 5fdfb7d49073..8149154cd3d9 100644

--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -492,7 +492,13 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
 
     const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
     if (!GEPOp) {
-      if (const auto *Call = dyn_cast<CallBase>(V)) {
+      if (const auto *PHI = dyn_cast<PHINode>(V)) {
+        // Look through single-arg phi nodes created by LCSSA.
+        if (PHI->getNumIncomingValues() == 1) {
+          V = PHI->getIncomingValue(0);
+          continue;
+        }
+      } else if (const auto *Call = dyn_cast<CallBase>(V)) {
         // CaptureTracking can know about special capturing properties of some
         // intrinsics like launder.invariant.group, that can't be expressed with
         // the attributes, but have properties like returning aliasing pointer.
@@ -508,19 +514,6 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
         }
       }
 
-      // If it's not a GEP, hand it off to SimplifyInstruction to see if it
-      // can come up with something. This matches what GetUnderlyingObject does.
-      if (const Instruction *I = dyn_cast<Instruction>(V))
-        // TODO: Get a DominatorTree and AssumptionCache and use them here
-        // (these are both now available in this function, but this should be
-        // updated when GetUnderlyingObject is updated). TLI should be
-        // provided also.
-        if (const Value *Simplified =
-                SimplifyInstruction(const_cast<Instruction *>(I), DL)) {
-          V = Simplified;
-          continue;
-        }
-
       Decomposed.Base = V;
       return false;
     }

diff  --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index d01889e37aa5..927add44dda3 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4151,11 +4151,14 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
       if (GA->isInterposable())
         return V;
       V = GA->getAliasee();
-    } else if (isa<AllocaInst>(V)) {
-      // An alloca can't be further simplified.
-      return V;
     } else {
-      if (auto *Call = dyn_cast<CallBase>(V)) {
+      if (auto *PHI = dyn_cast<PHINode>(V)) {
+        // Look through single-arg phi nodes created by LCSSA.
+        if (PHI->getNumIncomingValues() == 1) {
+          V = PHI->getIncomingValue(0);
+          continue;
+        }
+      } else if (auto *Call = dyn_cast<CallBase>(V)) {
         // CaptureTracking can know about special capturing properties of some
         // intrinsics like launder.invariant.group, that can't be expressed with
         // the attributes, but have properties like returning aliasing pointer.
@@ -4171,14 +4174,6 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
         }
       }
 
-      // See if InstructionSimplify knows any relevant tricks.
-      if (Instruction *I = dyn_cast<Instruction>(V))
-        // TODO: Acquire a DominatorTree and AssumptionCache and use them.
-        if (Value *Simplified = SimplifyInstruction(I, {DL, I})) {
-          V = Simplified;
-          continue;
-        }
-
       return V;
     }
     assert(V->getType()->isPointerTy() && "Unexpected operand type!");

diff  --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
index 007cc6f3c988..ca261afdce4a 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
@@ -61,21 +61,18 @@ define amdgpu_kernel void @lds_promote_alloca_select_two_derived_constant_pointe
   ret void
 }
 
+; FIXME: Can be promoted, but we'd have to recursively show that the select
+; operands all point to the same alloca.
+
 ; CHECK-LABEL: @lds_promoted_alloca_select_input_select(
-; CHECK: getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_select_input_select.alloca, i32 0, i32 %{{[0-9]+}}
-; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
-; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %b
-; CHECK: %ptr2 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %c
-; CHECK: %select0 = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
-; CHECK: %select1 = select i1 undef, i32 addrspace(3)* %select0, i32 addrspace(3)* %ptr2
-; CHECK: store i32 0, i32 addrspace(3)* %select1, align 4
-define amdgpu_kernel void @lds_promoted_alloca_select_input_select(i32 %a, i32 %b, i32 %c) #0 {
+; CHECK: alloca
+define amdgpu_kernel void @lds_promoted_alloca_select_input_select(i32 %a, i32 %b, i32 %c, i1 %c1, i1 %c2) #0 {
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
   %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %b
   %ptr2 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %c
-  %select0 = select i1 undef, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1
-  %select1 = select i1 undef, i32 addrspace(5)* %select0, i32 addrspace(5)* %ptr2
+  %select0 = select i1 %c1, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1
+  %select1 = select i1 %c2, i32 addrspace(5)* %select0, i32 addrspace(5)* %ptr2
   store i32 0, i32 addrspace(5)* %select1, align 4
   ret void
 }