[PATCH] D21190: [InstCombine] allow more than one use for vector cast folding with selects
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 9 10:50:57 PDT 2016
spatel created this revision.
spatel added reviewers: majnemer, eli.friedman, RKSimon.
spatel added a subscriber: llvm-commits.
Herald added a subscriber: mcrosier.
The motivating example for this transform is similar to D20774 where bitcasts interfere with a single cmp/select sequence, but in this case we have 2 uses of each bitcast to produce min and max ops:
define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) {
%cmp = fcmp olt <4 x float> %a, %b
%bc1 = bitcast <4 x float> %a to <4 x i32>
%bc2 = bitcast <4 x float> %b to <4 x i32>
%sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2
%sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1
%bc3 = bitcast <4 x float>* %ptr1 to <4 x i32>*
store <4 x i32> %sel1, <4 x i32>* %bc3
%bc4 = bitcast <4 x float>* %ptr2 to <4 x i32>*
store <4 x i32> %sel2, <4 x i32>* %bc4
ret void
}
With this patch, we move the selects up to use the input args which allows getting rid of all of the bitcasts:
define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) {
%cmp = fcmp olt <4 x float> %a, %b
%sel1.v = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
%sel2.v = select <4 x i1> %cmp, <4 x float> %b, <4 x float> %a
store <4 x float> %sel1.v, <4 x float>* %ptr1, align 16
store <4 x float> %sel2.v, <4 x float>* %ptr2, align 16
ret void
}
The asm for x86 SSE then improves from:
movaps %xmm0, %xmm2
cmpltps %xmm1, %xmm2
movaps %xmm2, %xmm3
andnps %xmm1, %xmm3
movaps %xmm2, %xmm4
andnps %xmm0, %xmm4
andps %xmm2, %xmm0
orps %xmm3, %xmm0
andps %xmm1, %xmm2
orps %xmm4, %xmm2
movaps %xmm0, (%rdi)
movaps %xmm2, (%rsi)
To:
movaps %xmm0, %xmm2
minps %xmm1, %xmm2
maxps %xmm0, %xmm1
movaps %xmm2, (%rdi)
movaps %xmm1, (%rsi)
http://reviews.llvm.org/D21190
Files:
lib/Transforms/InstCombine/InstCombineSelect.cpp
test/Transforms/InstCombine/select.ll
Index: test/Transforms/InstCombine/select.ll
===================================================================
--- test/Transforms/InstCombine/select.ll
+++ test/Transforms/InstCombine/select.ll
@@ -723,6 +723,30 @@
ret i48 %tmp2
}
+; Allow select promotion even if there are multiple uses of casted ops.
+; Hoisting the selects allows later pattern matching to see that these are min/max ops.
+
+define void @min_max_bitcast(<4 x float> %a, <4 x float> %b, <4 x i32>* %ptr1, <4 x i32>* %ptr2) {
+; CHECK-LABEL: @min_max_bitcast(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x float> %a, %b
+; CHECK-NEXT: [[SEL1_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> %a, <4 x float> %b
+; CHECK-NEXT: [[SEL2_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32>* %ptr1 to <4 x float>*
+; CHECK-NEXT: store <4 x float> [[SEL1_V]], <4 x float>* [[TMP1]], align 16
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32>* %ptr2 to <4 x float>*
+; CHECK-NEXT: store <4 x float> [[SEL2_V]], <4 x float>* [[TMP2]], align 16
+; CHECK-NEXT: ret void
+;
+ %cmp = fcmp olt <4 x float> %a, %b
+ %bc1 = bitcast <4 x float> %a to <4 x i32>
+ %bc2 = bitcast <4 x float> %b to <4 x i32>
+ %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2
+ %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1
+ store <4 x i32> %sel1, <4 x i32>* %ptr1
+ store <4 x i32> %sel2, <4 x i32>* %ptr2
+ ret void
+}
+
; PR8575
define i32 @test52(i32 %n, i32 %m) nounwind {
Index: lib/Transforms/InstCombine/InstCombineSelect.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -116,8 +116,7 @@
}
}
-/// Here we have (select c, TI, FI), and we know that TI and FI
-/// have the same opcode and only one use each. Try to simplify this.
+/// We have (select c, TI, FI), and we know that TI and FI have the same opcode.
Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
Instruction *FI) {
// If this is a cast from the same type, merge.
@@ -129,10 +128,17 @@
// The select condition may be a vector. We may only change the operand
// type if the vector width remains the same (and matches the condition).
Type *CondTy = SI.getCondition()->getType();
- if (CondTy->isVectorTy() &&
- (!FIOpndTy->isVectorTy() ||
- CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements()))
+ if (CondTy->isVectorTy()) {
+ if (!FIOpndTy->isVectorTy())
+ return nullptr;
+ if (CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements())
+ return nullptr;
+ } else if (!TI->hasOneUse() || !FI->hasOneUse()) {
+ // TODO: The one-use restrictions for a scalar select could be eased if
+ // the fold of a select in visitLoadInst() was enhanced to match a pattern
+ // that includes a cast.
return nullptr;
+ }
// Fold this by inserting a select from the input values.
Value *NewSI = Builder->CreateSelect(SI.getCondition(), TI->getOperand(0),
@@ -142,7 +148,7 @@
}
// Only handle binary operators here.
- if (!isa<BinaryOperator>(TI))
+ if (!isa<BinaryOperator>(TI) || !TI->hasOneUse() || !FI->hasOneUse())
return nullptr;
// Figure out if the operations have any operands in common.
@@ -1056,14 +1062,12 @@
if (Instruction *Add = foldAddSubSelect(SI, *Builder))
return Add;
+ // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
auto *TI = dyn_cast<Instruction>(TrueVal);
auto *FI = dyn_cast<Instruction>(FalseVal);
- if (TI && FI && TI->hasOneUse() && FI->hasOneUse()) {
- // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
- if (TI->getOpcode() == FI->getOpcode())
- if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
- return IV;
- }
+ if (TI && FI && TI->getOpcode() == FI->getOpcode())
+ if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
+ return IV;
// See if we can fold the select into one of our operands.
if (SI.getType()->isIntOrIntVectorTy() || SI.getType()->isFPOrFPVectorTy()) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D21190.60197.patch
Type: text/x-patch
Size: 4263 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160609/cda37952/attachment.bin>
More information about the llvm-commits
mailing list