[llvm] r270142 - [ARM, AArch64] Match additional patterns to ldN instructions

Matthew Simpson via llvm-commits llvm-commits at lists.llvm.org
Thu May 19 14:39:00 PDT 2016


Author: mssimpso
Date: Thu May 19 16:39:00 2016
New Revision: 270142

URL: http://llvm.org/viewvc/llvm-project?rev=270142&view=rev
Log:
[ARM, AArch64] Match additional patterns to ldN instructions

When matching an interleaved load to an ldN pattern, the interleaved access
pass checks that all users of the load are shuffles. If the load is used by an
instruction other than a shuffle, the pass gives up and an ldN is not
generated. This patch considers users of the load that are extractelement
instructions. It attempts to modify the extracts to use one of the available
shuffles rather than the load. After the transformation, the load is only used
by shuffles and will then be matched with an ldN pattern.

Differential Revision: http://reviews.llvm.org/D20250

Added:
    llvm/trunk/test/CodeGen/AArch64/aarch64-interleaved-accesses-extract-user.ll
    llvm/trunk/test/CodeGen/ARM/arm-interleaved-accesses-extract-user.ll
Modified:
    llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp
    llvm/trunk/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
    llvm/trunk/test/CodeGen/ARM/arm-interleaved-accesses.ll

Modified: llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp?rev=270142&r1=270141&r2=270142&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp (original)
+++ llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp Thu May 19 16:39:00 2016
@@ -40,6 +40,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
@@ -65,7 +66,7 @@ class InterleavedAccess : public Functio
 public:
   static char ID;
   InterleavedAccess(const TargetMachine *TM = nullptr)
-      : FunctionPass(ID), TM(TM), TLI(nullptr) {
+      : FunctionPass(ID), DT(nullptr), TM(TM), TLI(nullptr) {
     initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
   }
 
@@ -73,7 +74,13 @@ public:
 
   bool runOnFunction(Function &F) override;
 
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+  }
+
 private:
+  DominatorTree *DT;
   const TargetMachine *TM;
   const TargetLowering *TLI;
 
@@ -84,13 +91,26 @@ private:
   /// \brief Transform an interleaved store into target specific intrinsics.
   bool lowerInterleavedStore(StoreInst *SI,
                              SmallVector<Instruction *, 32> &DeadInsts);
+
+  /// \brief Returns true if the uses of an interleaved load by the
+  /// extractelement instructions in \p Extracts can be replaced by uses of the
+  /// shufflevector instructions in \p Shuffles instead. If so, the necessary
+  /// replacements are also performed.
+  bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
+                          ArrayRef<ShuffleVectorInst *> Shuffles);
 };
 } // end anonymous namespace.
 
 char InterleavedAccess::ID = 0;
-INITIALIZE_TM_PASS(InterleavedAccess, "interleaved-access",
-    "Lower interleaved memory accesses to target specific intrinsics",
-    false, false)
+INITIALIZE_TM_PASS_BEGIN(
+    InterleavedAccess, "interleaved-access",
+    "Lower interleaved memory accesses to target specific intrinsics", false,
+    false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_TM_PASS_END(
+    InterleavedAccess, "interleaved-access",
+    "Lower interleaved memory accesses to target specific intrinsics", false,
+    false)
 
 FunctionPass *llvm::createInterleavedAccessPass(const TargetMachine *TM) {
   return new InterleavedAccess(TM);
@@ -179,9 +199,18 @@ bool InterleavedAccess::lowerInterleaved
     return false;
 
   SmallVector<ShuffleVectorInst *, 4> Shuffles;
+  SmallVector<ExtractElementInst *, 4> Extracts;
 
-  // Check if all users of this load are shufflevectors.
+  // Check if all users of this load are shufflevectors. If we encounter any
+  // users that are extractelement instructions, we save them to later check if
+  // they can be modifed to extract from one of the shufflevectors instead of
+  // the load.
   for (auto UI = LI->user_begin(), E = LI->user_end(); UI != E; UI++) {
+    auto *Extract = dyn_cast<ExtractElementInst>(*UI);
+    if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
+      Extracts.push_back(Extract);
+      continue;
+    }
     ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(*UI);
     if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
       return false;
@@ -217,6 +246,11 @@ bool InterleavedAccess::lowerInterleaved
     Indices.push_back(Index);
   }
 
+  // Try and modify users of the load that are extractelement instructions to
+  // use the shufflevector instructions instead of the load.
+  if (!tryReplaceExtracts(Extracts, Shuffles))
+    return false;
+
   DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
 
   // Try to create target specific intrinsics to replace the load and shuffles.
@@ -230,6 +264,73 @@ bool InterleavedAccess::lowerInterleaved
   return true;
 }
 
+bool InterleavedAccess::tryReplaceExtracts(
+    ArrayRef<ExtractElementInst *> Extracts,
+    ArrayRef<ShuffleVectorInst *> Shuffles) {
+
+  // If there aren't any extractelement instructions to modify, there's nothing
+  // to do.
+  if (Extracts.empty())
+    return true;
+
+  // Maps extractelement instructions to vector-index pairs. The extractlement
+  // instructions will be modified to use the new vector and index operands.
+  DenseMap<ExtractElementInst *, std::pair<Value *, int>> ReplacementMap;
+
+  for (auto *Extract : Extracts) {
+
+    // The vector index that is extracted.
+    auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
+    auto Index = IndexOperand->getSExtValue();
+
+    // Look for a suitable shufflevector instruction. The goal is to modify the
+    // extractelement instruction (which uses an interleaved load) to use one
+    // of the shufflevector instructions instead of the load.
+    for (auto *Shuffle : Shuffles) {
+
+      // If the shufflevector instruction doesn't dominate the extract, we
+      // can't create a use of it.
+      if (!DT->dominates(Shuffle, Extract))
+        continue;
+
+      // Inspect the indices of the shufflevector instruction. If the shuffle
+      // selects the same index that is extracted, we can modify the
+      // extractelement instruction.
+      SmallVector<int, 4> Indices;
+      Shuffle->getShuffleMask(Indices);
+      for (unsigned I = 0; I < Indices.size(); ++I)
+        if (Indices[I] == Index) {
+          assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
+                 "Vector operations do not match");
+          ReplacementMap[Extract] = std::make_pair(Shuffle, I);
+          break;
+        }
+
+      // If we found a suitable shufflevector instruction, stop looking.
+      if (ReplacementMap.count(Extract))
+        break;
+    }
+
+    // If we did not find a suitable shufflevector instruction, the
+    // extractelement instruction cannot be modified, so we must give up.
+    if (!ReplacementMap.count(Extract))
+      return false;
+  }
+
+  // Finally, perform the replacements.
+  IRBuilder<> Builder(Extracts[0]->getContext());
+  for (auto &Replacement : ReplacementMap) {
+    auto *Extract = Replacement.first;
+    auto *Vector = Replacement.second.first;
+    auto Index = Replacement.second.second;
+    Builder.SetInsertPoint(Extract);
+    Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index));
+    Extract->eraseFromParent();
+  }
+
+  return true;
+}
+
 bool InterleavedAccess::lowerInterleavedStore(
     StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts) {
   if (!SI->isSimple())
@@ -262,6 +363,7 @@ bool InterleavedAccess::runOnFunction(Fu
 
   DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
 
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   TLI = TM->getSubtargetImpl(F)->getTargetLowering();
   MaxFactor = TLI->getMaxSupportedInterleaveFactor();
 

Added: llvm/trunk/test/CodeGen/AArch64/aarch64-interleaved-accesses-extract-user.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/aarch64-interleaved-accesses-extract-user.ll?rev=270142&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/aarch64-interleaved-accesses-extract-user.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/aarch64-interleaved-accesses-extract-user.ll Thu May 19 16:39:00 2016
@@ -0,0 +1,86 @@
+; RUN: opt < %s -mtriple=aarch64 -interleaved-access -S | FileCheck %s
+
+; CHECK-LABEL: @extract_user_basic(
+; CHECK: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
+; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %ldN, 0
+; CHECK: extractelement <4 x i32> %[[R]], i64 1
+define void @extract_user_basic(<8 x i32>* %A, i1 %C) {
+entry:
+  %L = load <8 x i32>, <8 x i32>* %A, align 8
+  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  br i1 %C, label %if.then, label %if.merge
+
+if.then:
+  %E = extractelement <8 x i32> %L, i32 2
+  br label %if.merge
+
+if.merge:
+  ret void
+}
+
+; CHECK-LABEL: @extract_user_multi(
+; CHECK: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
+; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %ldN, 0
+; CHECK: extractelement <4 x i32> %[[R]], i64 0
+; CHECK: extractelement <4 x i32> %[[R]], i64 1
+define void @extract_user_multi(<8 x i32>* %A, i1 %C) {
+entry:
+  %L = load <8 x i32>, <8 x i32>* %A, align 8
+  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  br i1 %C, label %if.then, label %if.merge
+
+if.then:
+  %E1 = extractelement <8 x i32> %L, i32 0
+  br label %if.merge
+
+if.merge:
+  %E2 = extractelement <8 x i32> %L, i32 2
+  ret void
+}
+
+; CHECK-LABEL: @extract_user_multi_no_dom(
+; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
+define void @extract_user_multi_no_dom(<8 x i32>* %A, i1 %C) {
+entry:
+  %L = load <8 x i32>, <8 x i32>* %A, align 8
+  %E1 = extractelement <8 x i32> %L, i32 0
+  br i1 %C, label %if.then, label %if.merge
+
+if.then:
+  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %E2 = extractelement <8 x i32> %L, i32 2
+  br label %if.merge
+
+if.merge:
+  ret void
+}
+
+; CHECK-LABEL: @extract_user_wrong_const_index(
+; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
+define void @extract_user_wrong_const_index(<8 x i32>* %A) {
+entry:
+  %L = load <8 x i32>, <8 x i32>* %A, align 8
+  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %E = extractelement <8 x i32> %L, i32 1
+  ret void
+}
+
+; CHECK-LABEL: @extract_user_undef_index(
+; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
+define void @extract_user_undef_index(<8 x i32>* %A) {
+entry:
+  %L = load <8 x i32>, <8 x i32>* %A, align 8
+  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %E = extractelement <8 x i32> %L, i32 undef
+  ret void
+}
+
+; CHECK-LABEL: @extract_user_var_index(
+; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
+define void @extract_user_var_index(<8 x i32>* %A, i32 %I) {
+entry:
+  %L = load <8 x i32>, <8 x i32>* %A, align 8
+  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %E = extractelement <8 x i32> %L, i32 %I
+  ret void
+}

Modified: llvm/trunk/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll?rev=270142&r1=270141&r2=270142&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll Thu May 19 16:39:00 2016
@@ -268,3 +268,15 @@ define void @store_illegal_factor2(<3 x
   store <3 x float> %tmp1, <3 x float>* %p, align 16
   ret void
 }
+
+; NEON-LABEL: load_factor2_with_extract_user:
+; NEON: ld2 { v0.4s, v1.4s }, [x0]
+; NEON: mov w0, v0.s[1]
+; NONEON-LABEL: load_factor2_with_extract_user:
+; NONEON-NOT: ld2
+define i32 @load_factor2_with_extract_user(<8 x i32>* %a) {
+  %1 = load <8 x i32>, <8 x i32>* %a, align 8
+  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %3 = extractelement <8 x i32> %1, i32 2
+  ret i32 %3
+}

Added: llvm/trunk/test/CodeGen/ARM/arm-interleaved-accesses-extract-user.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/arm-interleaved-accesses-extract-user.ll?rev=270142&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/arm-interleaved-accesses-extract-user.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/arm-interleaved-accesses-extract-user.ll Thu May 19 16:39:00 2016
@@ -0,0 +1,86 @@
+; RUN: opt < %s -mtriple=arm-eabi -mattr=+neon -interleaved-access -S | FileCheck %s
+
+; CHECK-LABEL: @extract_user_basic(
+; CHECK: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
+; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %vldN, 0
+; CHECK: extractelement <4 x i32> %[[R]], i64 1
+define void @extract_user_basic(<8 x i32>* %A, i1 %C) {
+entry:
+  %L = load <8 x i32>, <8 x i32>* %A, align 8
+  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  br i1 %C, label %if.then, label %if.merge
+
+if.then:
+  %E = extractelement <8 x i32> %L, i32 2
+  br label %if.merge
+
+if.merge:
+  ret void
+}
+
+; CHECK-LABEL: @extract_user_multi(
+; CHECK: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
+; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %vldN, 0
+; CHECK: extractelement <4 x i32> %[[R]], i64 0
+; CHECK: extractelement <4 x i32> %[[R]], i64 1
+define void @extract_user_multi(<8 x i32>* %A, i1 %C) {
+entry:
+  %L = load <8 x i32>, <8 x i32>* %A, align 8
+  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  br i1 %C, label %if.then, label %if.merge
+
+if.then:
+  %E1 = extractelement <8 x i32> %L, i32 0
+  br label %if.merge
+
+if.merge:
+  %E2 = extractelement <8 x i32> %L, i32 2
+  ret void
+}
+
+; CHECK-LABEL: @extract_user_multi_no_dom(
+; CHECK-NOT: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
+define void @extract_user_multi_no_dom(<8 x i32>* %A, i1 %C) {
+entry:
+  %L = load <8 x i32>, <8 x i32>* %A, align 8
+  %E1 = extractelement <8 x i32> %L, i32 0
+  br i1 %C, label %if.then, label %if.merge
+
+if.then:
+  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %E2 = extractelement <8 x i32> %L, i32 2
+  br label %if.merge
+
+if.merge:
+  ret void
+}
+
+; CHECK-LABEL: @extract_user_wrong_const_index(
+; CHECK-NOT: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
+define void @extract_user_wrong_const_index(<8 x i32>* %A) {
+entry:
+  %L = load <8 x i32>, <8 x i32>* %A, align 8
+  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %E = extractelement <8 x i32> %L, i32 1
+  ret void
+}
+
+; CHECK-LABEL: @extract_user_undef_index(
+; CHECK-NOT: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
+define void @extract_user_undef_index(<8 x i32>* %A) {
+entry:
+  %L = load <8 x i32>, <8 x i32>* %A, align 8
+  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %E = extractelement <8 x i32> %L, i32 undef
+  ret void
+}
+
+; CHECK-LABEL: @extract_user_var_index(
+; CHECK-NOT: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
+define void @extract_user_var_index(<8 x i32>* %A, i32 %I) {
+entry:
+  %L = load <8 x i32>, <8 x i32>* %A, align 8
+  %S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %E = extractelement <8 x i32> %L, i32 %I
+  ret void
+}

Modified: llvm/trunk/test/CodeGen/ARM/arm-interleaved-accesses.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/arm-interleaved-accesses.ll?rev=270142&r1=270141&r2=270142&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/arm-interleaved-accesses.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/arm-interleaved-accesses.ll Thu May 19 16:39:00 2016
@@ -304,3 +304,15 @@ define void @store_illegal_factor2(<3 x
   store <3 x float> %tmp1, <3 x float>* %p, align 16
   ret void
 }
+
+; NEON-LABEL: load_factor2_with_extract_user:
+; NEON: vld2.32 {d16, d17, d18, d19}, [r0:64]
+; NEON: vmov.32 r0, d16[1]
+; NONEON-LABEL: load_factor2_with_extract_user:
+; NONEON-NOT: vld2
+define i32 @load_factor2_with_extract_user(<8 x i32>* %a) {
+  %1 = load <8 x i32>, <8 x i32>* %a, align 8
+  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %3 = extractelement <8 x i32> %1, i32 2
+  ret i32 %3
+}




More information about the llvm-commits mailing list