[llvm] 03cdb52 - [GlobalISel] Fix load-or combine moving loads across potential aliasing stores.

Mon Jul 19 10:23:29 PDT 2021

Author: Amara Emerson
Date: 2021-07-19T10:23:23-07:00
New Revision: 03cdb5221d120c4000725e4aa862ef8c8b852274

URL: https://github.com/llvm/llvm-project/commit/03cdb5221d120c4000725e4aa862ef8c8b852274
DIFF: https://github.com/llvm/llvm-project/commit/03cdb5221d120c4000725e4aa862ef8c8b852274.diff

LOG: [GlobalISel] Fix load-or combine moving loads across potential aliasing stores.

Although this combine checks that there's no load folding barriers between
the loads that it's trying to merge, it was inserting the load at the
MIRBuilder's default insertion point, which is the G_OR use inst.

This was causing a miscompile in the test suite's
SingleSource/Regression/C/gcc-c-torture/execute/GCC-C-execute-bswap-2

Differential Revision: https://reviews.llvm.org/D106251

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 0ce9d8e5299e1..775563ab2d47f 100644

--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -596,8 +596,10 @@ class CombinerHelper {
   /// at to the index of the load.
   /// \param [in] MemSizeInBits - The number of bits each load should produce.
   ///
-  /// \returns The lowest-index load found and the lowest index on success.
-  Optional<std::pair<GZExtLoad *, int64_t>> findLoadOffsetsForLoadOrCombine(
+  /// \returns On success, a 3-tuple containing lowest-index load found, the
+  /// lowest index, and the last load in the sequence.
+  Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
+  findLoadOffsetsForLoadOrCombine(
       SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
       const SmallVector<Register, 8> &RegsToVisit,
       const unsigned MemSizeInBits);

diff  --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index e6ca02e8bd03d..590a65aa14fc7 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -28,6 +28,7 @@
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetMachine.h"
+#include <tuple>
 
 #define DEBUG_TYPE "gi-combiner"
 
@@ -3455,7 +3456,7 @@ matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
   return std::make_pair(Load, Shift / MemSizeInBits);
 }
 
-Optional<std::pair<GZExtLoad *, int64_t>>
+Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
 CombinerHelper::findLoadOffsetsForLoadOrCombine(
     SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
     const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
@@ -3478,10 +3479,10 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
   const MachineMemOperand *MMO = nullptr;
 
   // Earliest instruction-order load in the pattern.
-  MachineInstr *EarliestLoad = nullptr;
+  GZExtLoad *EarliestLoad = nullptr;
 
   // Latest instruction-order load in the pattern.
-  MachineInstr *LatestLoad = nullptr;
+  GZExtLoad *LatestLoad = nullptr;
 
   // Base pointer which every load should share.
   Register BasePtr;
@@ -3586,7 +3587,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
       return None;
   }
 
-  return std::make_pair(LowestIdxLoad, LowestIdx);
+  return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
 }
 
 bool CombinerHelper::matchLoadOrCombine(
@@ -3634,13 +3635,13 @@ bool CombinerHelper::matchLoadOrCombine(
   // Also verify that each of these ends up putting a[i] into the same memory
   // offset as a load into a wide type would.
   SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
-  GZExtLoad *LowestIdxLoad;
+  GZExtLoad *LowestIdxLoad, *LatestLoad;
   int64_t LowestIdx;
   auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
       MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
   if (!MaybeLoadInfo)
     return false;
-  std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo;
+  std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
 
   // We have a bunch of loads being OR'd together. Using the addresses + offsets
   // we found before, check if this corresponds to a big or little endian byte
@@ -3695,6 +3696,7 @@ bool CombinerHelper::matchLoadOrCombine(
     return false;
 
   MatchInfo = [=](MachineIRBuilder &MIB) {
+    MIB.setInstrAndDebugLoc(*LatestLoad);
     Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
     MIB.buildLoad(LoadDst, Ptr, *NewMMO);
     if (NeedsBSwap)

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir
index 4b02fe205e5a0..428def14dc78e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir
@@ -1397,9 +1397,6 @@ body:             |
 name:            dont_combine_store_between_
diff erent_mbb
 tracksRegLiveness: true
 body:             |
-  ; There is a store between the two loads, hidden away in a 
diff erent MBB.
-  ; We should not combine here.
-
   ; LITTLE-LABEL: name: dont_combine_store_between_
diff erent_mbb
   ; LITTLE: bb.0:
   ; LITTLE:   successors: %bb.1(0x80000000)
@@ -1444,6 +1441,9 @@ body:             |
   ; BIG:   %full_load:_(s32) = G_OR %low_half, %high_half
   ; BIG:   $w1 = COPY %full_load(s32)
   ; BIG:   RET_ReallyLR implicit $w1
+  ; There is a store between the two loads, hidden away in a 
diff erent MBB.
+  ; We should not combine here.
+
 
   bb.0:
     successors: %bb.1(0x80000000)
@@ -1479,8 +1479,6 @@ body:             |
 name:            
diff erent_mbb
 tracksRegLiveness: true
 body:             |
-  ; It should be possible to combine here, but it's not supported right now.
-
   ; LITTLE-LABEL: name: 
diff erent_mbb
   ; LITTLE: bb.0:
   ; LITTLE:   successors: %bb.1(0x80000000)
@@ -1513,6 +1511,8 @@ body:             |
   ; BIG:   %full_load:_(s32) = G_OR %low_half, %high_half
   ; BIG:   $w1 = COPY %full_load(s32)
   ; BIG:   RET_ReallyLR implicit $w1
+  ; It should be possible to combine here, but it's not supported right now.
+
 
   bb.0:
     successors: %bb.1(0x80000000)
@@ -1569,3 +1569,67 @@ body:             |
     %full_load:_(s32) = G_OR %low_half, %high_half
     $w1 = COPY %full_load(s32)
     RET_ReallyLR implicit $w1
+
+...
+---
+name:            store_between_loads_and_or
+alignment:       4
+tracksRegLiveness: true
+
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; Check that we build the G_LOAD at the point of the last load, instead of place of the G_OR.
+    ; We could have a G_STORE in between which may not be safe to move the load across.
+  liveins: $x0, $x1
+    ; LITTLE-LABEL: name: store_between_loads_and_or
+    ; LITTLE: liveins: $x0, $x1, $x0, $x1
+    ; LITTLE: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; LITTLE: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+    ; LITTLE: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
+    ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1)
+    ; LITTLE: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8))
+    ; LITTLE: $w0 = COPY [[LOAD]](s32)
+    ; LITTLE: RET_ReallyLR implicit $w0
+    ; BIG-LABEL: name: store_between_loads_and_or
+    ; BIG: liveins: $x0, $x1, $x0, $x1
+    ; BIG: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; BIG: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+    ; BIG: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
+    ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1)
+    ; BIG: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[LOAD]]
+    ; BIG: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8))
+    ; BIG: $w0 = COPY [[BSWAP]](s32)
+    ; BIG: RET_ReallyLR implicit $w0
+  %0:_(p0) = COPY $x0
+  %1:_(p0) = COPY $x1
+  %12:_(s8) = G_CONSTANT i8 1
+  %15:_(s32) = G_CONSTANT i32 8
+  %19:_(s32) = G_CONSTANT i32 16
+  %23:_(s32) = G_CONSTANT i32 24
+  %13:_(s32) = G_ZEXTLOAD %0:_(p0) :: (load (s8))
+  %3:_(s64) = G_CONSTANT i64 1
+  %4:_(p0) = G_PTR_ADD %0:_, %3:_(s64)
+  %14:_(s32) = G_ZEXTLOAD %4:_(p0) :: (load (s8))
+  %6:_(s64) = G_CONSTANT i64 2
+  %7:_(p0) = G_PTR_ADD %0:_, %6:_(s64)
+  %18:_(s32) = G_ZEXTLOAD %7:_(p0) :: (load (s8))
+  %9:_(s64) = G_CONSTANT i64 3
+  %10:_(p0) = G_PTR_ADD %0:_, %9:_(s64)
+  %22:_(s32) = G_ZEXTLOAD %10:_(p0) :: (load (s8))
+  G_STORE %12:_(s8), %1:_(p0) :: (store (s8))
+  %16:_(s32) = nuw nsw G_SHL %14:_, %15:_(s32)
+  %17:_(s32) = G_OR %16:_, %13:_
+  %20:_(s32) = nuw nsw G_SHL %18:_, %19:_(s32)
+  %21:_(s32) = G_OR %17:_, %20:_
+  %24:_(s32) = nuw G_SHL %22:_, %23:_(s32)
+  %25:_(s32) = G_OR %21:_, %24:_
+  $w0 = COPY %25:_(s32)
+  RET_ReallyLR implicit $w0
+
+...