[llvm] [RFC][X86] Allow speculative BSR/BSF instructions on targets with CMOV (PR #102885)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 20 08:21:23 PDT 2024
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff 254da5ab8bce846bcbac9862f31c1891d8feea44 cc0e78a69336f84812a1d6ea9f3f95a80e8b8abe --extensions cpp -- llvm/lib/Target/X86/X86ISelLowering.cpp llvm/lib/Target/X86/X86TargetTransformInfo.cpp
``````````
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index cb9ee64a67..274a798620 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -4206,104 +4206,106 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::CTPOP, MVT::i16, { 1, 1, 2, 2 } }, // popcnt(zext())
{ ISD::CTPOP, MVT::i8, { 1, 1, 2, 2 } }, // popcnt(zext())
};
- static const CostKindTblEntry X64CostTbl[] = { // 64-bit targets
- { ISD::ABS, MVT::i64, { 1, 2, 3, 3 } }, // SUB+CMOV
- { ISD::BITREVERSE, MVT::i64, { 10, 12, 20, 22 } },
- { ISD::BSWAP, MVT::i64, { 1, 2, 1, 2 } },
- { ISD::CTLZ, MVT::i64, { 2, 2, 4, 5 } }, // BSR+XOR or BSR+XOR+CMOV
- { ISD::CTLZ_ZERO_UNDEF, MVT::i64,{ 1, 2, 2, 2 } }, // BSR+XOR
- { ISD::CTTZ, MVT::i64, { 2, 2, 3, 4 } }, // TEST+BSF+CMOV/BRANCH
- { ISD::CTTZ_ZERO_UNDEF, MVT::i64,{ 1, 2, 1, 2 } }, // BSF
- { ISD::CTPOP, MVT::i64, { 10, 6, 19, 19 } },
- { ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
- { ISD::ROTR, MVT::i64, { 2, 3, 1, 3 } },
- { X86ISD::VROTLI, MVT::i64, { 1, 1, 1, 1 } },
- { ISD::FSHL, MVT::i64, { 4, 4, 1, 4 } },
- { ISD::SADDSAT, MVT::i64, { 4, 4, 7, 10 } },
- { ISD::SSUBSAT, MVT::i64, { 4, 5, 8, 11 } },
- { ISD::UADDSAT, MVT::i64, { 2, 3, 4, 7 } },
- { ISD::USUBSAT, MVT::i64, { 2, 3, 4, 7 } },
- { ISD::SMAX, MVT::i64, { 1, 3, 2, 3 } },
- { ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
- { ISD::UMAX, MVT::i64, { 1, 3, 2, 3 } },
- { ISD::UMIN, MVT::i64, { 1, 3, 2, 3 } },
- { ISD::SADDO, MVT::i64, { 2, 2, 4, 6 } },
- { ISD::UADDO, MVT::i64, { 2, 2, 4, 6 } },
- { ISD::SMULO, MVT::i64, { 4, 4, 4, 6 } },
- { ISD::UMULO, MVT::i64, { 8, 8, 4, 7 } },
+ static const CostKindTblEntry X64CostTbl[] = {
+ // 64-bit targets
+ {ISD::ABS, MVT::i64, {1, 2, 3, 3}}, // SUB+CMOV
+ {ISD::BITREVERSE, MVT::i64, {10, 12, 20, 22}},
+ {ISD::BSWAP, MVT::i64, {1, 2, 1, 2}},
+ {ISD::CTLZ, MVT::i64, {2, 2, 4, 5}}, // BSR+XOR or BSR+XOR+CMOV
+ {ISD::CTLZ_ZERO_UNDEF, MVT::i64, {1, 2, 2, 2}}, // BSR+XOR
+ {ISD::CTTZ, MVT::i64, {2, 2, 3, 4}}, // TEST+BSF+CMOV/BRANCH
+ {ISD::CTTZ_ZERO_UNDEF, MVT::i64, {1, 2, 1, 2}}, // BSF
+ {ISD::CTPOP, MVT::i64, {10, 6, 19, 19}},
+ {ISD::ROTL, MVT::i64, {2, 3, 1, 3}},
+ {ISD::ROTR, MVT::i64, {2, 3, 1, 3}},
+ {X86ISD::VROTLI, MVT::i64, {1, 1, 1, 1}},
+ {ISD::FSHL, MVT::i64, {4, 4, 1, 4}},
+ {ISD::SADDSAT, MVT::i64, {4, 4, 7, 10}},
+ {ISD::SSUBSAT, MVT::i64, {4, 5, 8, 11}},
+ {ISD::UADDSAT, MVT::i64, {2, 3, 4, 7}},
+ {ISD::USUBSAT, MVT::i64, {2, 3, 4, 7}},
+ {ISD::SMAX, MVT::i64, {1, 3, 2, 3}},
+ {ISD::SMIN, MVT::i64, {1, 3, 2, 3}},
+ {ISD::UMAX, MVT::i64, {1, 3, 2, 3}},
+ {ISD::UMIN, MVT::i64, {1, 3, 2, 3}},
+ {ISD::SADDO, MVT::i64, {2, 2, 4, 6}},
+ {ISD::UADDO, MVT::i64, {2, 2, 4, 6}},
+ {ISD::SMULO, MVT::i64, {4, 4, 4, 6}},
+ {ISD::UMULO, MVT::i64, {8, 8, 4, 7}},
};
- static const CostKindTblEntry X86CostTbl[] = { // 32 or 64-bit targets
- { ISD::ABS, MVT::i32, { 1, 2, 3, 3 } }, // SUB+XOR+SRA or SUB+CMOV
- { ISD::ABS, MVT::i16, { 2, 2, 3, 3 } }, // SUB+XOR+SRA or SUB+CMOV
- { ISD::ABS, MVT::i8, { 2, 4, 4, 3 } }, // SUB+XOR+SRA
- { ISD::BITREVERSE, MVT::i32, { 9, 12, 17, 19 } },
- { ISD::BITREVERSE, MVT::i16, { 9, 12, 17, 19 } },
- { ISD::BITREVERSE, MVT::i8, { 7, 9, 13, 14 } },
- { ISD::BSWAP, MVT::i32, { 1, 1, 1, 1 } },
- { ISD::BSWAP, MVT::i16, { 1, 2, 1, 2 } }, // ROL
- { ISD::CTLZ, MVT::i32, { 2, 2, 4, 5 } }, // BSR+XOR or BSR+XOR+CMOV
- { ISD::CTLZ, MVT::i16, { 2, 2, 4, 5 } }, // BSR+XOR or BSR+XOR+CMOV
- { ISD::CTLZ, MVT::i8, { 2, 2, 5, 6 } }, // BSR+XOR or BSR+XOR+CMOV
- { ISD::CTLZ_ZERO_UNDEF, MVT::i32,{ 1, 2, 2, 2 } }, // BSR+XOR
- { ISD::CTLZ_ZERO_UNDEF, MVT::i16,{ 2, 2, 2, 2 } }, // BSR+XOR
- { ISD::CTLZ_ZERO_UNDEF, MVT::i8, { 2, 2, 3, 3 } }, // BSR+XOR
- { ISD::CTTZ, MVT::i32, { 2, 2, 3, 3 } }, // TEST+BSF+CMOV/BRANCH
- { ISD::CTTZ, MVT::i16, { 2, 2, 2, 3 } }, // TEST+BSF+CMOV/BRANCH
- { ISD::CTTZ, MVT::i8, { 2, 2, 2, 3 } }, // TEST+BSF+CMOV/BRANCH
- { ISD::CTTZ_ZERO_UNDEF, MVT::i32,{ 1, 2, 1, 2 } }, // BSF
- { ISD::CTTZ_ZERO_UNDEF, MVT::i16,{ 2, 2, 1, 2 } }, // BSF
- { ISD::CTTZ_ZERO_UNDEF, MVT::i8, { 2, 2, 1, 2 } }, // BSF
- { ISD::CTPOP, MVT::i32, { 8, 7, 15, 15 } },
- { ISD::CTPOP, MVT::i16, { 9, 8, 17, 17 } },
- { ISD::CTPOP, MVT::i8, { 7, 6, 6, 6 } },
- { ISD::ROTL, MVT::i32, { 2, 3, 1, 3 } },
- { ISD::ROTL, MVT::i16, { 2, 3, 1, 3 } },
- { ISD::ROTL, MVT::i8, { 2, 3, 1, 3 } },
- { ISD::ROTR, MVT::i32, { 2, 3, 1, 3 } },
- { ISD::ROTR, MVT::i16, { 2, 3, 1, 3 } },
- { ISD::ROTR, MVT::i8, { 2, 3, 1, 3 } },
- { X86ISD::VROTLI, MVT::i32, { 1, 1, 1, 1 } },
- { X86ISD::VROTLI, MVT::i16, { 1, 1, 1, 1 } },
- { X86ISD::VROTLI, MVT::i8, { 1, 1, 1, 1 } },
- { ISD::FSHL, MVT::i32, { 4, 4, 1, 4 } },
- { ISD::FSHL, MVT::i16, { 4, 4, 2, 5 } },
- { ISD::FSHL, MVT::i8, { 4, 4, 2, 5 } },
- { ISD::SADDSAT, MVT::i32, { 3, 4, 6, 9 } },
- { ISD::SADDSAT, MVT::i16, { 4, 4, 7, 10 } },
- { ISD::SADDSAT, MVT::i8, { 4, 5, 8, 11 } },
- { ISD::SSUBSAT, MVT::i32, { 4, 4, 7, 10 } },
- { ISD::SSUBSAT, MVT::i16, { 4, 4, 7, 10 } },
- { ISD::SSUBSAT, MVT::i8, { 4, 5, 8, 11 } },
- { ISD::UADDSAT, MVT::i32, { 2, 3, 4, 7 } },
- { ISD::UADDSAT, MVT::i16, { 2, 3, 4, 7 } },
- { ISD::UADDSAT, MVT::i8, { 3, 3, 5, 8 } },
- { ISD::USUBSAT, MVT::i32, { 2, 3, 4, 7 } },
- { ISD::USUBSAT, MVT::i16, { 2, 3, 4, 7 } },
- { ISD::USUBSAT, MVT::i8, { 3, 3, 5, 8 } },
- { ISD::SMAX, MVT::i32, { 1, 2, 2, 3 } },
- { ISD::SMAX, MVT::i16, { 1, 4, 2, 4 } },
- { ISD::SMAX, MVT::i8, { 1, 4, 2, 4 } },
- { ISD::SMIN, MVT::i32, { 1, 2, 2, 3 } },
- { ISD::SMIN, MVT::i16, { 1, 4, 2, 4 } },
- { ISD::SMIN, MVT::i8, { 1, 4, 2, 4 } },
- { ISD::UMAX, MVT::i32, { 1, 2, 2, 3 } },
- { ISD::UMAX, MVT::i16, { 1, 4, 2, 4 } },
- { ISD::UMAX, MVT::i8, { 1, 4, 2, 4 } },
- { ISD::UMIN, MVT::i32, { 1, 2, 2, 3 } },
- { ISD::UMIN, MVT::i16, { 1, 4, 2, 4 } },
- { ISD::UMIN, MVT::i8, { 1, 4, 2, 4 } },
- { ISD::SADDO, MVT::i32, { 2, 2, 4, 6 } },
- { ISD::SADDO, MVT::i16, { 2, 2, 4, 6 } },
- { ISD::SADDO, MVT::i8, { 2, 2, 4, 6 } },
- { ISD::UADDO, MVT::i32, { 2, 2, 4, 6 } },
- { ISD::UADDO, MVT::i16, { 2, 2, 4, 6 } },
- { ISD::UADDO, MVT::i8, { 2, 2, 4, 6 } },
- { ISD::SMULO, MVT::i32, { 2, 2, 4, 6 } },
- { ISD::SMULO, MVT::i16, { 5, 5, 4, 6 } },
- { ISD::SMULO, MVT::i8, { 6, 6, 4, 6 } },
- { ISD::UMULO, MVT::i32, { 6, 6, 4, 8 } },
- { ISD::UMULO, MVT::i16, { 6, 6, 4, 9 } },
- { ISD::UMULO, MVT::i8, { 6, 6, 4, 6 } },
+ static const CostKindTblEntry X86CostTbl[] = {
+ // 32 or 64-bit targets
+ {ISD::ABS, MVT::i32, {1, 2, 3, 3}}, // SUB+XOR+SRA or SUB+CMOV
+ {ISD::ABS, MVT::i16, {2, 2, 3, 3}}, // SUB+XOR+SRA or SUB+CMOV
+ {ISD::ABS, MVT::i8, {2, 4, 4, 3}}, // SUB+XOR+SRA
+ {ISD::BITREVERSE, MVT::i32, {9, 12, 17, 19}},
+ {ISD::BITREVERSE, MVT::i16, {9, 12, 17, 19}},
+ {ISD::BITREVERSE, MVT::i8, {7, 9, 13, 14}},
+ {ISD::BSWAP, MVT::i32, {1, 1, 1, 1}},
+ {ISD::BSWAP, MVT::i16, {1, 2, 1, 2}}, // ROL
+ {ISD::CTLZ, MVT::i32, {2, 2, 4, 5}}, // BSR+XOR or BSR+XOR+CMOV
+ {ISD::CTLZ, MVT::i16, {2, 2, 4, 5}}, // BSR+XOR or BSR+XOR+CMOV
+ {ISD::CTLZ, MVT::i8, {2, 2, 5, 6}}, // BSR+XOR or BSR+XOR+CMOV
+ {ISD::CTLZ_ZERO_UNDEF, MVT::i32, {1, 2, 2, 2}}, // BSR+XOR
+ {ISD::CTLZ_ZERO_UNDEF, MVT::i16, {2, 2, 2, 2}}, // BSR+XOR
+ {ISD::CTLZ_ZERO_UNDEF, MVT::i8, {2, 2, 3, 3}}, // BSR+XOR
+ {ISD::CTTZ, MVT::i32, {2, 2, 3, 3}}, // TEST+BSF+CMOV/BRANCH
+ {ISD::CTTZ, MVT::i16, {2, 2, 2, 3}}, // TEST+BSF+CMOV/BRANCH
+ {ISD::CTTZ, MVT::i8, {2, 2, 2, 3}}, // TEST+BSF+CMOV/BRANCH
+ {ISD::CTTZ_ZERO_UNDEF, MVT::i32, {1, 2, 1, 2}}, // BSF
+ {ISD::CTTZ_ZERO_UNDEF, MVT::i16, {2, 2, 1, 2}}, // BSF
+ {ISD::CTTZ_ZERO_UNDEF, MVT::i8, {2, 2, 1, 2}}, // BSF
+ {ISD::CTPOP, MVT::i32, {8, 7, 15, 15}},
+ {ISD::CTPOP, MVT::i16, {9, 8, 17, 17}},
+ {ISD::CTPOP, MVT::i8, {7, 6, 6, 6}},
+ {ISD::ROTL, MVT::i32, {2, 3, 1, 3}},
+ {ISD::ROTL, MVT::i16, {2, 3, 1, 3}},
+ {ISD::ROTL, MVT::i8, {2, 3, 1, 3}},
+ {ISD::ROTR, MVT::i32, {2, 3, 1, 3}},
+ {ISD::ROTR, MVT::i16, {2, 3, 1, 3}},
+ {ISD::ROTR, MVT::i8, {2, 3, 1, 3}},
+ {X86ISD::VROTLI, MVT::i32, {1, 1, 1, 1}},
+ {X86ISD::VROTLI, MVT::i16, {1, 1, 1, 1}},
+ {X86ISD::VROTLI, MVT::i8, {1, 1, 1, 1}},
+ {ISD::FSHL, MVT::i32, {4, 4, 1, 4}},
+ {ISD::FSHL, MVT::i16, {4, 4, 2, 5}},
+ {ISD::FSHL, MVT::i8, {4, 4, 2, 5}},
+ {ISD::SADDSAT, MVT::i32, {3, 4, 6, 9}},
+ {ISD::SADDSAT, MVT::i16, {4, 4, 7, 10}},
+ {ISD::SADDSAT, MVT::i8, {4, 5, 8, 11}},
+ {ISD::SSUBSAT, MVT::i32, {4, 4, 7, 10}},
+ {ISD::SSUBSAT, MVT::i16, {4, 4, 7, 10}},
+ {ISD::SSUBSAT, MVT::i8, {4, 5, 8, 11}},
+ {ISD::UADDSAT, MVT::i32, {2, 3, 4, 7}},
+ {ISD::UADDSAT, MVT::i16, {2, 3, 4, 7}},
+ {ISD::UADDSAT, MVT::i8, {3, 3, 5, 8}},
+ {ISD::USUBSAT, MVT::i32, {2, 3, 4, 7}},
+ {ISD::USUBSAT, MVT::i16, {2, 3, 4, 7}},
+ {ISD::USUBSAT, MVT::i8, {3, 3, 5, 8}},
+ {ISD::SMAX, MVT::i32, {1, 2, 2, 3}},
+ {ISD::SMAX, MVT::i16, {1, 4, 2, 4}},
+ {ISD::SMAX, MVT::i8, {1, 4, 2, 4}},
+ {ISD::SMIN, MVT::i32, {1, 2, 2, 3}},
+ {ISD::SMIN, MVT::i16, {1, 4, 2, 4}},
+ {ISD::SMIN, MVT::i8, {1, 4, 2, 4}},
+ {ISD::UMAX, MVT::i32, {1, 2, 2, 3}},
+ {ISD::UMAX, MVT::i16, {1, 4, 2, 4}},
+ {ISD::UMAX, MVT::i8, {1, 4, 2, 4}},
+ {ISD::UMIN, MVT::i32, {1, 2, 2, 3}},
+ {ISD::UMIN, MVT::i16, {1, 4, 2, 4}},
+ {ISD::UMIN, MVT::i8, {1, 4, 2, 4}},
+ {ISD::SADDO, MVT::i32, {2, 2, 4, 6}},
+ {ISD::SADDO, MVT::i16, {2, 2, 4, 6}},
+ {ISD::SADDO, MVT::i8, {2, 2, 4, 6}},
+ {ISD::UADDO, MVT::i32, {2, 2, 4, 6}},
+ {ISD::UADDO, MVT::i16, {2, 2, 4, 6}},
+ {ISD::UADDO, MVT::i8, {2, 2, 4, 6}},
+ {ISD::SMULO, MVT::i32, {2, 2, 4, 6}},
+ {ISD::SMULO, MVT::i16, {5, 5, 4, 6}},
+ {ISD::SMULO, MVT::i8, {6, 6, 4, 6}},
+ {ISD::UMULO, MVT::i32, {6, 6, 4, 8}},
+ {ISD::UMULO, MVT::i16, {6, 6, 4, 9}},
+ {ISD::UMULO, MVT::i8, {6, 6, 4, 6}},
};
Type *RetTy = ICA.getReturnType();
``````````
</details>
https://github.com/llvm/llvm-project/pull/102885
More information about the llvm-commits
mailing list