[llvm] [InstCombine] Optimistically allow multiple shufflevector uses in foldOpPhi (PR #114278)
Matthias Braun via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 7 15:05:43 PST 2024
https://github.com/MatzeB updated https://github.com/llvm/llvm-project/pull/114278
>From 42aba4bbb4503bbe0b54eafb6cef695f7df91c6c Mon Sep 17 00:00:00 2001
From: Matthias Braun <matze at braunis.de>
Date: Tue, 29 Oct 2024 16:38:30 -0700
Subject: [PATCH 1/2] Optimistically multiple uses in foldOpPhi for
shufflevector
We would like to optimize situations of the form:
```
loop:
%phi = phi zeroinitializer, %interleaved
%deinterleave_a = shufflevector %phi, poison ; pick half of the lanes
%deinterleave_b = shufflevector %phi, posion ; pick remaining lanes
...
%interleaved = shufflevector %a, %b ; interleave lanes of a+b
```
where the interleave and de-interleave patterns cancel each other out.
This could be handled by `foldOpPhi` but requires to proceed with
two uses on the `Phi` operation.
This extends `foldOpPhi` proceed with more than one use if all
uses are `shufflevector` and are guaranteed to simplify with all
`Phi` predecessors.
---
.../InstCombine/InstructionCombining.cpp | 55 +++++++++++++-----
.../InstCombine/vec_shuffle-phi-multiuse.ll | 57 +++++++++++++++++++
2 files changed, 97 insertions(+), 15 deletions(-)
create mode 100644 llvm/test/Transforms/InstCombine/vec_shuffle-phi-multiuse.ll
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 2a54390c0f1882..9b2074e481f54f 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1773,17 +1773,33 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
if (NumPHIValues == 0)
return nullptr;
- // We normally only transform phis with a single use. However, if a PHI has
- // multiple uses and they are all the same operation, we can fold *all* of the
- // uses into the PHI.
+ // We normally only transform phis with a single use.
+ bool AllUsesIdentical = false;
+ bool MultipleUses = false;
if (!PN->hasOneUse()) {
- // Walk the use list for the instruction, comparing them to I.
+ // Exceptions:
+ // - All uses are identical.
+ // - All uses are shufflevector instructions that fully simplify; this
+ // helps interleave -> phi -> 2x de-interleave+de patterns.
+ if (isa<ShuffleVectorInst>(I)) {
+ MultipleUses = true;
+ }
+ AllUsesIdentical = true;
+ unsigned NumUses = 0;
for (User *U : PN->users()) {
+ ++NumUses;
Instruction *UI = cast<Instruction>(U);
- if (UI != &I && !I.isIdenticalTo(UI))
+ if (UI == &I)
+ continue;
+
+ if (!I.isIdenticalTo(UI))
+ AllUsesIdentical = false;
+ // Only inspect first 4 uses to avoid quadratic complexity.
+ if (!isa<ShuffleVectorInst>(UI) || NumUses > 4)
+ MultipleUses = false;
+ if (!AllUsesIdentical && !MultipleUses)
return nullptr;
}
- // Otherwise, we can replace *all* users with the new PHI we form.
}
// Check that all operands are phi-translatable.
@@ -1834,6 +1850,11 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
continue;
}
+ // Be conservative in MultipleUses case and do not allow non-simplified
+ // vals.
+ if (MultipleUses)
+ return nullptr;
+
if (SeenNonSimplifiedInVal)
return nullptr; // More than one non-simplified value.
SeenNonSimplifiedInVal = true;
@@ -1895,17 +1916,21 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
for (unsigned i = 0; i != NumPHIValues; ++i)
NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
- for (User *U : make_early_inc_range(PN->users())) {
- Instruction *User = cast<Instruction>(U);
- if (User == &I)
- continue;
- replaceInstUsesWith(*User, NewPN);
- eraseInstFromFunction(*User);
+ if (AllUsesIdentical) {
+ for (User *U : make_early_inc_range(PN->users())) {
+ Instruction *User = cast<Instruction>(U);
+ if (User == &I)
+ continue;
+ replaceInstUsesWith(*User, NewPN);
+ eraseInstFromFunction(*User);
+ }
}
- replaceAllDbgUsesWith(const_cast<PHINode &>(*PN),
- const_cast<PHINode &>(*NewPN),
- const_cast<PHINode &>(*PN), DT);
+ if (!MultipleUses || AllUsesIdentical) {
+ replaceAllDbgUsesWith(const_cast<PHINode &>(*PN),
+ const_cast<PHINode &>(*NewPN),
+ const_cast<PHINode &>(*PN), DT);
+ }
return replaceInstUsesWith(I, NewPN);
}
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle-phi-multiuse.ll b/llvm/test/Transforms/InstCombine/vec_shuffle-phi-multiuse.ll
new file mode 100644
index 00000000000000..597eae101e9f2d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle-phi-multiuse.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -passes=instcombine | FileCheck %s
+
+define void @f(ptr %p_begin, ptr %p_end, ptr %out) {
+; CHECK-LABEL: define void @f(
+; CHECK-SAME: ptr [[P_BEGIN:%.*]], ptr [[P_END:%.*]], ptr [[OUT:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[ACC:%.*]] = phi <4 x float> [ zeroinitializer, %[[ENTRY]] ], [ [[SUM_LOWS:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[ODDS:%.*]] = phi <4 x float> [ zeroinitializer, %[[ENTRY]] ], [ [[SUM_HIGHS:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[P_BEGIN]], %[[ENTRY]] ], [ [[P_INC:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[VAL:%.*]] = load <4 x i8>, ptr [[P]], align 4
+; CHECK-NEXT: [[HIGHS:%.*]] = ashr <4 x i8> [[VAL]], <i8 4, i8 4, i8 4, i8 4>
+; CHECK-NEXT: [[LOWS:%.*]] = and <4 x i8> [[VAL]], <i8 15, i8 15, i8 15, i8 15>
+; CHECK-NEXT: [[HIGHS_F:%.*]] = sitofp <4 x i8> [[HIGHS]] to <4 x float>
+; CHECK-NEXT: [[LOWS_F:%.*]] = uitofp nneg <4 x i8> [[LOWS]] to <4 x float>
+; CHECK-NEXT: [[SUM_LOWS]] = fadd <4 x float> [[ACC]], [[LOWS_F]]
+; CHECK-NEXT: [[SUM_HIGHS]] = fadd <4 x float> [[ODDS]], [[HIGHS_F]]
+; CHECK-NEXT: [[P_INC]] = getelementptr inbounds i8, ptr [[P]], i64 4
+; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[P_INC]], [[P_END]]
+; CHECK-NEXT: br i1 [[C]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[INTERLEAVE:%.*]] = shufflevector <4 x float> [[SUM_LOWS]], <4 x float> [[SUM_HIGHS]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT: store <8 x float> [[INTERLEAVE]], ptr [[OUT]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %acc = phi <8 x float> [ zeroinitializer, %entry ], [ %interleave, %loop ]
+ %p = phi ptr [%p_begin, %entry ], [%p_inc, %loop]
+
+ %evens = shufflevector <8 x float> %acc, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %odds = shufflevector <8 x float> %acc, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+
+ %val = load <4 x i8>, ptr %p, align 4
+ %highs = ashr <4 x i8> %val, <i8 4, i8 4, i8 4, i8 4>
+ %lows = and <4 x i8> %val, <i8 15, i8 15, i8 15, i8 15>
+
+ %highs_f = sitofp <4 x i8> %highs to <4 x float>
+ %lows_f = sitofp <4 x i8> %lows to <4 x float>
+
+ %sum_lows = fadd <4 x float> %evens, %lows_f
+ %sum_highs = fadd <4 x float> %odds, %highs_f
+
+ %interleave = shufflevector <4 x float> %sum_lows, <4 x float> %sum_highs, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+
+ %p_inc = getelementptr inbounds i8, ptr %p, i32 4
+ %c = icmp eq ptr %p_inc, %p_end
+ br i1 %c, label %exit, label %loop
+
+exit:
+ store <8 x float> %interleave, ptr %out, align 4
+ ret void
+}
>From 394d2df634fbb1f2d22d85308abb411d5d16c67f Mon Sep 17 00:00:00 2001
From: Matthias Braun <matze at braunis.de>
Date: Thu, 7 Nov 2024 14:57:37 -0800
Subject: [PATCH 2/2] Address review feedback
---
.../InstCombine/InstructionCombining.cpp | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 9b2074e481f54f..714d13d4986066 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1775,15 +1775,13 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// We normally only transform phis with a single use.
bool AllUsesIdentical = false;
- bool MultipleUses = false;
+ bool MultipleShuffleVectorUses = false;
if (!PN->hasOneUse()) {
// Exceptions:
// - All uses are identical.
// - All uses are shufflevector instructions that fully simplify; this
// helps interleave -> phi -> 2x de-interleave+de patterns.
- if (isa<ShuffleVectorInst>(I)) {
- MultipleUses = true;
- }
+ MultipleShuffleVectorUses = isa<ShuffleVectorInst>(I);
AllUsesIdentical = true;
unsigned NumUses = 0;
for (User *U : PN->users()) {
@@ -1796,8 +1794,8 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
AllUsesIdentical = false;
// Only inspect first 4 uses to avoid quadratic complexity.
if (!isa<ShuffleVectorInst>(UI) || NumUses > 4)
- MultipleUses = false;
- if (!AllUsesIdentical && !MultipleUses)
+ MultipleShuffleVectorUses = false;
+ if (!AllUsesIdentical && !MultipleShuffleVectorUses)
return nullptr;
}
}
@@ -1850,9 +1848,9 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
continue;
}
- // Be conservative in MultipleUses case and do not allow non-simplified
- // vals.
- if (MultipleUses)
+ // Be conservative in cases with multiple uses and require all inputs to
+ // simplify.
+ if (MultipleShuffleVectorUses)
return nullptr;
if (SeenNonSimplifiedInVal)
@@ -1926,7 +1924,7 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
}
}
- if (!MultipleUses || AllUsesIdentical) {
+ if (!MultipleShuffleVectorUses || AllUsesIdentical) {
replaceAllDbgUsesWith(const_cast<PHINode &>(*PN),
const_cast<PHINode &>(*NewPN),
const_cast<PHINode &>(*PN), DT);
More information about the llvm-commits
mailing list