[llvm] e671641 - [GC] Remove buggy untested optimization from statepoint lowering

Wed Mar 11 10:03:31 PDT 2020

Author: Philip Reames
Date: 2020-03-11T10:03:24-07:00
New Revision: e6716418442bf178774ea2ec1d34d3d6a5e06b88

URL: https://github.com/llvm/llvm-project/commit/e6716418442bf178774ea2ec1d34d3d6a5e06b88
DIFF: https://github.com/llvm/llvm-project/commit/e6716418442bf178774ea2ec1d34d3d6a5e06b88.diff

LOG: [GC] Remove buggy untested optimization from statepoint lowering

A downstream test case (see included reduced test) revealed that we have a bug in how we handle duplicate relocations. If we have the same SDValue relocated twice, and that value happens to be a constant (such as null), we only export one of the two llvm::Values. Exporting on a per llvm::Value basis is required to allow lowering of gc.relocates in following basic blocks (e.g. invokes). Without it, we end up with a use of an undefined vreg and bad things happen.

Rather than fixing the optimization - which appears to be hard - I propose we simply remove it. There are no tests in tree that change with this code removed. If we find out later that this did matter for something, we can reimplement a variation of this in CodeGenPrepare to catch the easy cases without complicating the lowering code.

Thanks to Denis and Serguei who did all the hard work of figuring out what went wrong here. The patch is by far the easy part. :)

Differential Revision: https://reviews.llvm.org/D75964

Added: 
    llvm/test/CodeGen/X86/statepoint-duplicates-export.ll

Modified: 
    llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
    llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index 2d41f90fe053..cbb3e28485db 100644

--- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -92,35 +92,12 @@ class FunctionLoweringInfo {
   DenseMap<const Value *, unsigned> CatchPadExceptionPointers;
 
   /// Keep track of frame indices allocated for statepoints as they could be
-  /// used across basic block boundaries.  This struct is more complex than a
-  /// simple map because the stateopint lowering code de-duplicates gc pointers
-  /// based on their SDValue (so %p and (bitcast %p to T) will get the same
-  /// slot), and we track that here.
-
-  struct StatepointSpillMap {
-    using SlotMapTy = DenseMap<const Value *, Optional<int>>;
-
-    /// Maps uniqued llvm IR values to the slots they were spilled in.  If a
-    /// value is mapped to None it means we visited the value but didn't spill
-    /// it (because it was a constant, for instance).
-    SlotMapTy SlotMap;
-
-    /// Maps llvm IR values to the values they were de-duplicated to.
-    DenseMap<const Value *, const Value *> DuplicateMap;
-
-    SlotMapTy::const_iterator find(const Value *V) const {
-      auto DuplIt = DuplicateMap.find(V);
-      if (DuplIt != DuplicateMap.end())
-        V = DuplIt->second;
-      return SlotMap.find(V);
-    }
-
-    SlotMapTy::const_iterator end() const { return SlotMap.end(); }
-  };
-
-  /// Maps gc.statepoint instructions to their corresponding StatepointSpillMap
-  /// instances.
-  DenseMap<const Instruction *, StatepointSpillMap> StatepointSpillMaps;
+  /// used across basic block boundaries (e.g. for an invoke).  For each
+  /// gc.statepoint instruction, maps uniqued llvm IR values to the slots they
+  /// were spilled in.  If a value is mapped to None it means we visited the
+  /// value but didn't spill it (because it was a constant, for instance). 
+  using StatepointSpillMapTy = DenseMap<const Value *, Optional<int>>;
+  DenseMap<const Instruction *, StatepointSpillMapTy> StatepointSpillMaps;
 
   /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
   /// the entry block.  This allows the allocas to be efficiently referenced

diff  --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 881288a84658..8301489df9a4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -268,45 +268,6 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
   Builder.StatepointLowering.setLocation(Incoming, Loc);
 }
 
-/// Remove any duplicate (as SDValues) from the derived pointer pairs.  This
-/// is not required for correctness.  It's purpose is to reduce the size of
-/// StackMap section.  It has no effect on the number of spill slots required
-/// or the actual lowering.
-static void
-removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases,
-                      SmallVectorImpl<const Value *> &Ptrs,
-                      SmallVectorImpl<const GCRelocateInst *> &Relocs,
-                      SelectionDAGBuilder &Builder,
-                      FunctionLoweringInfo::StatepointSpillMap &SSM) {
-  DenseMap<SDValue, const Value *> Seen;
-
-  SmallVector<const Value *, 64> NewBases, NewPtrs;
-  SmallVector<const GCRelocateInst *, 64> NewRelocs;
-  for (size_t i = 0, e = Ptrs.size(); i < e; i++) {
-    SDValue SD = Builder.getValue(Ptrs[i]);
-    auto SeenIt = Seen.find(SD);
-
-    if (SeenIt == Seen.end()) {
-      // Only add non-duplicates
-      NewBases.push_back(Bases[i]);
-      NewPtrs.push_back(Ptrs[i]);
-      NewRelocs.push_back(Relocs[i]);
-      Seen[SD] = Ptrs[i];
-    } else {
-      // Duplicate pointer found, note in SSM and move on:
-      SSM.DuplicateMap[Ptrs[i]] = SeenIt->second;
-    }
-  }
-  assert(Bases.size() >= NewBases.size());
-  assert(Ptrs.size() >= NewPtrs.size());
-  assert(Relocs.size() >= NewRelocs.size());
-  Bases = NewBases;
-  Ptrs = NewPtrs;
-  Relocs = NewRelocs;
-  assert(Ptrs.size() == Bases.size());
-  assert(Ptrs.size() == Relocs.size());
-}
-
 /// Extract call from statepoint, lower it and return pointer to the
 /// call node. Also update NodeMap so that getValue(statepoint) will
 /// reference lowered call result
@@ -610,7 +571,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
     SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
 
     if (Loc.getNode()) {
-      SpillMap.SlotMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
+      SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
     } else {
       // Record value as visited, but not spilled. This is case for allocas
       // and constants. For this values we can avoid emitting spill load while
@@ -618,7 +579,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
       // Actually we do not need to record them in this map at all.
       // We do this only to check that we are not relocating any unvisited
       // value.
-      SpillMap.SlotMap[V] = None;
+      SpillMap[V] = None;
 
       // Default llvm mechanisms for exporting values which are used in
       // 
diff erent basic blocks does not work for gc relocates.
@@ -641,24 +602,15 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
   NumOfStatepoints++;
   // Clear state
   StatepointLowering.startNewStatepoint(*this);
+  assert(SI.Bases.size() == SI.Ptrs.size() &&
+         SI.Ptrs.size() == SI.GCRelocates.size());
 
 #ifndef NDEBUG
-  // We schedule gc relocates before removeDuplicateGCPtrs since we _will_
-  // encounter the duplicate gc relocates we elide in removeDuplicateGCPtrs.
   for (auto *Reloc : SI.GCRelocates)
     if (Reloc->getParent() == SI.StatepointInstr->getParent())
       StatepointLowering.scheduleRelocCall(*Reloc);
 #endif
 
-  // Remove any redundant llvm::Values which map to the same SDValue as another
-  // input.  Also has the effect of removing duplicates in the original
-  // llvm::Value input list as well.  This is a useful optimization for
-  // reducing the size of the StackMap section.  It has no other impact.
-  removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this,
-                        FuncInfo.StatepointSpillMaps[SI.StatepointInstr]);
-  assert(SI.Bases.size() == SI.Ptrs.size() &&
-         SI.Ptrs.size() == SI.GCRelocates.size());
-
   // Lower statepoint vmstate and gcstate arguments
   SmallVector<SDValue, 10> LoweredMetaArgs;
   SmallVector<MachineMemOperand*, 16> MemRefs;

diff  --git a/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll b/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll
new file mode 100644
index 000000000000..ccc0d61b3c67
--- /dev/null
+++ b/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc  -verify-machineinstrs < %s | FileCheck %s
+
+; Check that we can export values of "duplicated" gc.relocates without a crash
+; "duplicate" here means maps to same SDValue.  We previously had an
+; optimization which tried to reduce number of spills/stackmap entries in such
+; cases, but it incorrectly handled exports across basis block boundaries
+; leading to a crash in this case.
+
+target triple = "x86_64-pc-linux-gnu"
+
+declare void @func()
+
+define i1 @test() gc "statepoint-example" {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    callq func
+; CHECK-NEXT:  .Ltmp0:
+; CHECK-NEXT:    callq func
+; CHECK-NEXT:  .Ltmp1:
+; CHECK-NEXT:    movb $1, %al
+; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* null, i32 addrspace(1)* null)
+  %base = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 7, i32 7)
+  %derived = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 7, i32 8)
+  %safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %base, i32 addrspace(1)* %derived)
+  br label %next
+
+next:
+  %base_reloc = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2,  i32 7, i32 7)
+  %derived_reloc = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2,  i32 7, i32 8)
+  %cmp1 = icmp eq i32 addrspace(1)* %base_reloc, null
+  %cmp2 = icmp eq i32 addrspace(1)* %derived_reloc, null
+  %cmp = and i1 %cmp1, %cmp2
+  ret i1 %cmp
+}
+
+; Showing redundant fill on first statepoint and spill/fill on second one
+define i1 @test2(i32 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: test2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    movq %rdi, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    callq func
+; CHECK-NEXT:  .Ltmp2:
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    callq func
+; CHECK-NEXT:  .Ltmp3:
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT:    orq {{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %arg, i32 addrspace(1)* %arg)
+  %base = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 7, i32 7)
+  %derived = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 7, i32 8)
+  %safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %base, i32 addrspace(1)* %derived)
+  br label %next
+
+next:
+  %base_reloc = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2,  i32 7, i32 7)
+  %derived_reloc = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2,  i32 7, i32 8)
+  %cmp1 = icmp eq i32 addrspace(1)* %base_reloc, null
+  %cmp2 = icmp eq i32 addrspace(1)* %derived_reloc, null
+  %cmp = and i1 %cmp1, %cmp2
+  ret i1 %cmp
+}
+
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)