[llvm] 62dd44d - [VectorCombine] fix cost calc for extract-cmp

Sun Feb 16 07:48:39 PST 2020

Author: Sanjay Patel
Date: 2020-02-16T10:40:28-05:00
New Revision: 62dd44d76da9aa596fb199bda8b1e8768bb41033

URL: https://github.com/llvm/llvm-project/commit/62dd44d76da9aa596fb199bda8b1e8768bb41033
DIFF: https://github.com/llvm/llvm-project/commit/62dd44d76da9aa596fb199bda8b1e8768bb41033.diff

LOG: [VectorCombine] fix cost calc for extract-cmp

getOperationCost() is not the cost we wanted; that's not the
throughput value that the rest of the calculation uses.

We may want to switch everything in this code to use the
getInstructionThroughput() wrapper to avoid these kinds of
problems, but I'll look at that as a follow-up because that
can create other logical diffs via using optional parameters
(we'd need to speculatively create the vector instruction to
make a fair(er) comparison).

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VectorCombine.cpp
    llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index f5a26d012de9..7b3697be0ae0 100644

--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -58,8 +58,9 @@ static bool foldExtractCmp(Instruction &I, const TargetTransformInfo &TTI) {
   // ((2 * extract) + scalar cmp) < (vector cmp + extract) ?
   int ExtractCost = TTI.getVectorInstrCost(Instruction::ExtractElement,
                                            VecTy, C->getZExtValue());
-  int ScalarCmpCost = TTI.getOperationCost(CmpOpcode, ScalarTy);
-  int VecCmpCost = TTI.getOperationCost(CmpOpcode, VecTy);
+  int ScalarCmpCost = TTI.getCmpSelInstrCost(CmpOpcode, ScalarTy, I.getType());
+  int VecCmpCost = TTI.getCmpSelInstrCost(CmpOpcode, VecTy,
+                                          CmpInst::makeCmpResultType(VecTy));
 
   int ScalarCost = 2 * ExtractCost + ScalarCmpCost;
   int VecCost = VecCmpCost + ExtractCost +

diff  --git a/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll b/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll
index d5d11df0ece0..8d04af3c8105 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- | FileCheck %s
+; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
 
 define i1 @cmp_v4i32(<4 x float> %arg, <4 x float> %arg1) {
 ; CHECK-LABEL: @cmp_v4i32(
@@ -57,18 +58,32 @@ bb18:
 }
 
 define i32 @cmp_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) {
-; CHECK-LABEL: @cmp_v2f64(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq <2 x double> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
-; CHECK-NEXT:    br i1 [[TMP1]], label [[T:%.*]], label [[F:%.*]]
-; CHECK:       t:
-; CHECK-NEXT:    [[TMP2:%.*]] = fcmp ogt <2 x double> [[Y]], [[Z:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
-; CHECK-NEXT:    [[E:%.*]] = select i1 [[TMP3]], i32 42, i32 99
-; CHECK-NEXT:    ret i32 [[E]]
-; CHECK:       f:
-; CHECK-NEXT:    ret i32 0
+; SSE-LABEL: @cmp_v2f64(
+; SSE-NEXT:  entry:
+; SSE-NEXT:    [[X1:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
+; SSE-NEXT:    [[Y1:%.*]] = extractelement <2 x double> [[Y:%.*]], i32 1
+; SSE-NEXT:    [[CMP1:%.*]] = fcmp oeq double [[X1]], [[Y1]]
+; SSE-NEXT:    br i1 [[CMP1]], label [[T:%.*]], label [[F:%.*]]
+; SSE:       t:
+; SSE-NEXT:    [[Z1:%.*]] = extractelement <2 x double> [[Z:%.*]], i32 1
+; SSE-NEXT:    [[CMP2:%.*]] = fcmp ogt double [[Y1]], [[Z1]]
+; SSE-NEXT:    [[E:%.*]] = select i1 [[CMP2]], i32 42, i32 99
+; SSE-NEXT:    ret i32 [[E]]
+; SSE:       f:
+; SSE-NEXT:    ret i32 0
+;
+; AVX-LABEL: @cmp_v2f64(
+; AVX-NEXT:  entry:
+; AVX-NEXT:    [[TMP0:%.*]] = fcmp oeq <2 x double> [[X:%.*]], [[Y:%.*]]
+; AVX-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
+; AVX-NEXT:    br i1 [[TMP1]], label [[T:%.*]], label [[F:%.*]]
+; AVX:       t:
+; AVX-NEXT:    [[TMP2:%.*]] = fcmp ogt <2 x double> [[Y]], [[Z:%.*]]
+; AVX-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
+; AVX-NEXT:    [[E:%.*]] = select i1 [[TMP3]], i32 42, i32 99
+; AVX-NEXT:    ret i32 [[E]]
+; AVX:       f:
+; AVX-NEXT:    ret i32 0
 ;
 entry:
   %x1 = extractelement <2 x double> %x, i32 1