[llvm] ed936aa - [InterleavedAccess] Return correct 'modified' status.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 4 07:59:16 PST 2021


Author: Florian Hahn
Date: 2021-01-04T15:49:47Z
New Revision: ed936aad7814404b3cc767d4515096f078dfcbb9

URL: https://github.com/llvm/llvm-project/commit/ed936aad7814404b3cc767d4515096f078dfcbb9
DIFF: https://github.com/llvm/llvm-project/commit/ed936aad7814404b3cc767d4515096f078dfcbb9.diff

LOG: [InterleavedAccess] Return correct 'modified' status.

Both tryReplaceExtracts and replaceBinOpShuffles may modify the IR, even
if no interleaved loads are generated, but currently the pass pretends
no changes were made.

This patch updates the pass to return true if either of the functions
made any changes. In case of tryReplaceExtracts, changes are made if
there are any Extracts and true is returned.

`replaceBinOpShuffles` always makes changes if BinOpShuffles is not empty.
It also always returned true, so I went ahead and change it to just
`replaceBinOpShuffles`.

Fixes PR48208.

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D93997

Added: 
    llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll

Modified: 
    llvm/lib/CodeGen/InterleavedAccessPass.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 73771609a792..6e1621450755 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -123,10 +123,11 @@ class InterleavedAccess : public FunctionPass {
   /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them
   /// to binop(shuffle(x), shuffle(y)) to allow the formation of an
   /// interleaving load. Any newly created shuffles that operate on \p LI will
-  /// be added to \p Shuffles.
-  bool tryReplaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
-                               SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
-                               LoadInst *LI);
+  /// be added to \p Shuffles. Returns true, if any changes to the IR have been
+  /// made.
+  bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
+                            SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
+                            LoadInst *LI);
 };
 
 } // end anonymous namespace.
@@ -369,14 +370,17 @@ bool InterleavedAccess::lowerInterleavedLoad(
   // use the shufflevector instructions instead of the load.
   if (!tryReplaceExtracts(Extracts, Shuffles))
     return false;
-  if (!tryReplaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI))
-    return false;
+
+  bool BinOpShuffleChanged =
+      replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
 
   LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
 
   // Try to create target specific intrinsics to replace the load and shuffles.
-  if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor))
-    return false;
+  if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
+    // If Extracts is not empty, tryReplaceExtracts made changes earlier.
+    return !Extracts.empty() || BinOpShuffleChanged;
+  }
 
   for (auto SVI : Shuffles)
     DeadInsts.push_back(SVI);
@@ -385,7 +389,7 @@ bool InterleavedAccess::lowerInterleavedLoad(
   return true;
 }
 
-bool InterleavedAccess::tryReplaceBinOpShuffles(
+bool InterleavedAccess::replaceBinOpShuffles(
     ArrayRef<ShuffleVectorInst *> BinOpShuffles,
     SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
   for (auto *SVI : BinOpShuffles) {
@@ -410,7 +414,8 @@ bool InterleavedAccess::tryReplaceBinOpShuffles(
     if (NewSVI2->getOperand(0) == LI)
       Shuffles.push_back(NewSVI2);
   }
-  return true;
+
+  return !BinOpShuffles.empty();
 }
 
 bool InterleavedAccess::tryReplaceExtracts(

diff  --git a/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll
new file mode 100644
index 000000000000..80f3195699dc
--- /dev/null
+++ b/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -interleaved-access -S %s | FileCheck %s
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.15.0"
+
+; No interleaved load instruction is generated, but the shuffle is moved just
+; after the load.
+define <2 x double> @shuffle_binop_fol(<4 x double>* %ptr) {
+; CHECK-LABEL: @shuffle_binop_fol(
+; CHECK-NEXT:  vector.body.preheader:
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[PTR:%.*]], align 8
+; CHECK-NEXT:    [[EXTRACTED1:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> undef, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[EXTRACTED2:%.*]] = shufflevector <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, <4 x double> undef, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[FADD3:%.*]] = fadd <2 x double> [[EXTRACTED1]], [[EXTRACTED2]]
+; CHECK-NEXT:    ret <2 x double> [[FADD3]]
+;
+vector.body.preheader:
+  %wide.load = load <4 x double>, <4 x double>* %ptr, align 8
+  %fadd = fadd <4 x double> %wide.load, <double 1.0, double 1.0, double 1.0, double 1.0>
+  %extracted = shufflevector <4 x double> %fadd, <4 x double> undef, <2 x i32> <i32 0, i32 2>
+  ret <2 x double> %extracted
+}
+
+; No interleaved load instruction is generated, but the extractelement
+; instructions are updated to use the shuffle instead of the load.
+define void @shuffle_extract(<4 x double>* %ptr, i1 %c) {
+; CHECK-LABEL: @shuffle_extract(
+; CHECK-NEXT:  vector.body.preheader:
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[PTR:%.*]], align 8
+; CHECK-NEXT:    [[EXTRACTED:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> undef, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_MERGE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[EXTRACTED]], i64 0
+; CHECK-NEXT:    call void @use(double [[TMP0]])
+; CHECK-NEXT:    br label [[IF_MERGE]]
+; CHECK:       if.merge:
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[EXTRACTED]], i64 1
+; CHECK-NEXT:    call void @use(double [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+vector.body.preheader:
+  %wide.load = load <4 x double>, <4 x double>* %ptr, align 8
+  %extracted = shufflevector <4 x double> %wide.load, <4 x double> undef, <2 x i32> <i32 0, i32 2>
+  br i1 %c, label %if.then, label %if.merge
+
+if.then:
+  %e0 = extractelement <4 x double> %wide.load, i32 0
+  call void @use(double %e0)
+  br label %if.merge
+
+if.merge:
+  %e1 = extractelement <4 x double> %wide.load, i32 2
+  call void @use(double %e1)
+  ret void
+}
+
+declare void @use(double)


        


More information about the llvm-commits mailing list