[llvm] r335349 - [SLPVectorizer] Relax alternate opcodes to accept any BinaryOperator pair
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 22 07:04:06 PDT 2018
Author: rksimon
Date: Fri Jun 22 07:04:06 2018
New Revision: 335349
URL: http://llvm.org/viewvc/llvm-project?rev=335349&view=rev
Log:
[SLPVectorizer] Relax alternate opcodes to accept any BinaryOperator pair
SLP currently only accepts (F)Add/(F)Sub alternate counterpart ops to be merged into an alternate shuffle.
This patch relaxes this to accept any pair of BinaryOperator opcodes instead, assuming the target's cost model accepts the vectorization+shuffle.
Differential Revision: https://reviews.llvm.org/D48477
Modified:
llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/trunk/test/Transforms/SLPVectorizer/AArch64/transpose.ll
Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=335349&r1=335348&r2=335349&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Fri Jun 22 07:04:06 2018
@@ -299,23 +299,6 @@ isShuffle(ArrayRef<Value *> VL) {
: TargetTransformInfo::SK_PermuteSingleSrc;
}
-///\returns Opcode that can be clubbed with \p Op to create an alternate
-/// sequence which can later be merged as a ShuffleVector instruction.
-static unsigned getAltOpcode(unsigned Op) {
- switch (Op) {
- case Instruction::FAdd:
- return Instruction::FSub;
- case Instruction::FSub:
- return Instruction::FAdd;
- case Instruction::Add:
- return Instruction::Sub;
- case Instruction::Sub:
- return Instruction::Add;
- default:
- return 0;
- }
-}
-
static bool sameOpcodeOrAlt(unsigned Opcode, unsigned AltOpcode,
unsigned CheckedOpcode) {
return Opcode == CheckedOpcode || AltOpcode == CheckedOpcode;
@@ -361,19 +344,20 @@ static InstructionsState getSameOpcode(A
if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); }))
return InstructionsState(VL[BaseIndex], 0, 0);
+ bool IsBinOp = isa<BinaryOperator>(VL[BaseIndex]);
unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode();
unsigned AltOpcode = Opcode;
- bool HasAltOpcodes = llvm::any_of(VL, [Opcode](Value *V) {
- return Opcode != cast<Instruction>(V)->getOpcode();
- });
-
- // Check for an alternate opcode pattern.
- if (HasAltOpcodes) {
- AltOpcode = getAltOpcode(Opcode);
- for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
- unsigned InstOpcode = cast<Instruction>(VL[Cnt])->getOpcode();
- if (!sameOpcodeOrAlt(Opcode, AltOpcode, InstOpcode))
- return InstructionsState(VL[BaseIndex], 0, 0);
+
+ // Check for one alternate opcode from another BinaryOperator.
+ // TODO - can we support other operators (casts etc.)?
+ for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
+ unsigned InstOpcode = cast<Instruction>(VL[Cnt])->getOpcode();
+ if (!sameOpcodeOrAlt(Opcode, AltOpcode, InstOpcode)) {
+ if (Opcode == AltOpcode && IsBinOp && isa<BinaryOperator>(VL[Cnt])) {
+ AltOpcode = InstOpcode;
+ continue;
+ }
+ return InstructionsState(VL[BaseIndex], 0, 0);
}
}
Modified: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/transpose.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/transpose.ll?rev=335349&r1=335348&r2=335349&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/transpose.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/transpose.ll Fri Jun 22 07:04:06 2018
@@ -197,25 +197,21 @@ define <4 x i32> @build_vec_v4i32_reuse_
define <4 x i32> @build_vec_v4i32_3_binops(<2 x i32> %v0, <2 x i32> %v1) {
; CHECK-LABEL: @build_vec_v4i32_3_binops(
-; CHECK-NEXT: [[V0_0:%.*]] = extractelement <2 x i32> %v0, i32 0
-; CHECK-NEXT: [[V0_1:%.*]] = extractelement <2 x i32> %v0, i32 1
-; CHECK-NEXT: [[V1_0:%.*]] = extractelement <2 x i32> %v1, i32 0
-; CHECK-NEXT: [[V1_1:%.*]] = extractelement <2 x i32> %v1, i32 1
-; CHECK-NEXT: [[TMP0_0:%.*]] = add i32 [[V0_0]], [[V1_0]]
-; CHECK-NEXT: [[TMP0_1:%.*]] = add i32 [[V0_1]], [[V1_1]]
-; CHECK-NEXT: [[TMP1_0:%.*]] = mul i32 [[V0_0]], [[V1_0]]
-; CHECK-NEXT: [[TMP1_1:%.*]] = mul i32 [[V0_1]], [[V1_1]]
-; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> %v0, %v1
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> %v0, %v1
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[TMP0_0]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP1_0]], i32 1
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP0_1]], i32 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP1_1]], i32 1
-; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[TMP3_3:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> %v0, <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> %v0, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = mul <2 x i32> [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i32> [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP13:%.*]] = add <2 x i32> [[TMP5]], [[TMP10]]
+; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i32> [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP3_3:%.*]] = shufflevector <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x i32> [[TMP3_3]]
;
%v0.0 = extractelement <2 x i32> %v0, i32 0
More information about the llvm-commits
mailing list