[llvm] 8bf624a - [RISCV] Key VectorIntrinsicCostTable by SEW [nfc-ish]
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 18 17:11:03 PST 2024
Author: Philip Reames
Date: 2024-01-18T17:10:56-08:00
New Revision: 8bf624af4776cdaad82c0fc4da9e9b0f14e9ea10
URL: https://github.com/llvm/llvm-project/commit/8bf624af4776cdaad82c0fc4da9e9b0f14e9ea10
DIFF: https://github.com/llvm/llvm-project/commit/8bf624af4776cdaad82c0fc4da9e9b0f14e9ea10.diff
LOG: [RISCV] Key VectorIntrinsicCostTable by SEW [nfc-ish]
Previously, we'd keyed the table by the vector type, but we were actually assigning the same cost for all the types with a common element type. Unless we'd missed an entry, this means that effectively we were performing an SEW lookup.
Restructure the table to make this SEW dependence more explicit, and in the process greatly reduce the size of the table.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 056b947c7a9f90..4ea3a519308995 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -656,494 +656,61 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
// instruction counts with the following adjustments made:
// * One vsetvli is considered free.
static const CostTblEntry VectorIntrinsicCostTable[]{
- {Intrinsic::floor, MVT::v2f32, 9},
- {Intrinsic::floor, MVT::v4f32, 9},
- {Intrinsic::floor, MVT::v8f32, 9},
- {Intrinsic::floor, MVT::v16f32, 9},
- {Intrinsic::floor, MVT::nxv1f32, 9},
- {Intrinsic::floor, MVT::nxv2f32, 9},
- {Intrinsic::floor, MVT::nxv4f32, 9},
- {Intrinsic::floor, MVT::nxv8f32, 9},
- {Intrinsic::floor, MVT::nxv16f32, 9},
- {Intrinsic::floor, MVT::v2f64, 9},
- {Intrinsic::floor, MVT::v4f64, 9},
- {Intrinsic::floor, MVT::v8f64, 9},
- {Intrinsic::floor, MVT::v16f64, 9},
- {Intrinsic::floor, MVT::nxv1f64, 9},
- {Intrinsic::floor, MVT::nxv2f64, 9},
- {Intrinsic::floor, MVT::nxv4f64, 9},
- {Intrinsic::floor, MVT::nxv8f64, 9},
- {Intrinsic::ceil, MVT::v2f32, 9},
- {Intrinsic::ceil, MVT::v4f32, 9},
- {Intrinsic::ceil, MVT::v8f32, 9},
- {Intrinsic::ceil, MVT::v16f32, 9},
- {Intrinsic::ceil, MVT::nxv1f32, 9},
- {Intrinsic::ceil, MVT::nxv2f32, 9},
- {Intrinsic::ceil, MVT::nxv4f32, 9},
- {Intrinsic::ceil, MVT::nxv8f32, 9},
- {Intrinsic::ceil, MVT::nxv16f32, 9},
- {Intrinsic::ceil, MVT::v2f64, 9},
- {Intrinsic::ceil, MVT::v4f64, 9},
- {Intrinsic::ceil, MVT::v8f64, 9},
- {Intrinsic::ceil, MVT::v16f64, 9},
- {Intrinsic::ceil, MVT::nxv1f64, 9},
- {Intrinsic::ceil, MVT::nxv2f64, 9},
- {Intrinsic::ceil, MVT::nxv4f64, 9},
- {Intrinsic::ceil, MVT::nxv8f64, 9},
- {Intrinsic::trunc, MVT::v2f32, 7},
- {Intrinsic::trunc, MVT::v4f32, 7},
- {Intrinsic::trunc, MVT::v8f32, 7},
- {Intrinsic::trunc, MVT::v16f32, 7},
- {Intrinsic::trunc, MVT::nxv1f32, 7},
- {Intrinsic::trunc, MVT::nxv2f32, 7},
- {Intrinsic::trunc, MVT::nxv4f32, 7},
- {Intrinsic::trunc, MVT::nxv8f32, 7},
- {Intrinsic::trunc, MVT::nxv16f32, 7},
- {Intrinsic::trunc, MVT::v2f64, 7},
- {Intrinsic::trunc, MVT::v4f64, 7},
- {Intrinsic::trunc, MVT::v8f64, 7},
- {Intrinsic::trunc, MVT::v16f64, 7},
- {Intrinsic::trunc, MVT::nxv1f64, 7},
- {Intrinsic::trunc, MVT::nxv2f64, 7},
- {Intrinsic::trunc, MVT::nxv4f64, 7},
- {Intrinsic::trunc, MVT::nxv8f64, 7},
- {Intrinsic::round, MVT::v2f32, 9},
- {Intrinsic::round, MVT::v4f32, 9},
- {Intrinsic::round, MVT::v8f32, 9},
- {Intrinsic::round, MVT::v16f32, 9},
- {Intrinsic::round, MVT::nxv1f32, 9},
- {Intrinsic::round, MVT::nxv2f32, 9},
- {Intrinsic::round, MVT::nxv4f32, 9},
- {Intrinsic::round, MVT::nxv8f32, 9},
- {Intrinsic::round, MVT::nxv16f32, 9},
- {Intrinsic::round, MVT::v2f64, 9},
- {Intrinsic::round, MVT::v4f64, 9},
- {Intrinsic::round, MVT::v8f64, 9},
- {Intrinsic::round, MVT::v16f64, 9},
- {Intrinsic::round, MVT::nxv1f64, 9},
- {Intrinsic::round, MVT::nxv2f64, 9},
- {Intrinsic::round, MVT::nxv4f64, 9},
- {Intrinsic::round, MVT::nxv8f64, 9},
- {Intrinsic::roundeven, MVT::v2f32, 9},
- {Intrinsic::roundeven, MVT::v4f32, 9},
- {Intrinsic::roundeven, MVT::v8f32, 9},
- {Intrinsic::roundeven, MVT::v16f32, 9},
- {Intrinsic::roundeven, MVT::nxv1f32, 9},
- {Intrinsic::roundeven, MVT::nxv2f32, 9},
- {Intrinsic::roundeven, MVT::nxv4f32, 9},
- {Intrinsic::roundeven, MVT::nxv8f32, 9},
- {Intrinsic::roundeven, MVT::nxv16f32, 9},
- {Intrinsic::roundeven, MVT::v2f64, 9},
- {Intrinsic::roundeven, MVT::v4f64, 9},
- {Intrinsic::roundeven, MVT::v8f64, 9},
- {Intrinsic::roundeven, MVT::v16f64, 9},
- {Intrinsic::roundeven, MVT::nxv1f64, 9},
- {Intrinsic::roundeven, MVT::nxv2f64, 9},
- {Intrinsic::roundeven, MVT::nxv4f64, 9},
- {Intrinsic::roundeven, MVT::nxv8f64, 9},
- {Intrinsic::rint, MVT::v2f32, 7},
- {Intrinsic::rint, MVT::v4f32, 7},
- {Intrinsic::rint, MVT::v8f32, 7},
- {Intrinsic::rint, MVT::v16f32, 7},
- {Intrinsic::rint, MVT::nxv1f32, 7},
- {Intrinsic::rint, MVT::nxv2f32, 7},
- {Intrinsic::rint, MVT::nxv4f32, 7},
- {Intrinsic::rint, MVT::nxv8f32, 7},
- {Intrinsic::rint, MVT::nxv16f32, 7},
- {Intrinsic::rint, MVT::v2f64, 7},
- {Intrinsic::rint, MVT::v4f64, 7},
- {Intrinsic::rint, MVT::v8f64, 7},
- {Intrinsic::rint, MVT::v16f64, 7},
- {Intrinsic::rint, MVT::nxv1f64, 7},
- {Intrinsic::rint, MVT::nxv2f64, 7},
- {Intrinsic::rint, MVT::nxv4f64, 7},
- {Intrinsic::rint, MVT::nxv8f64, 7},
- {Intrinsic::lrint, MVT::v2i32, 1},
- {Intrinsic::lrint, MVT::v4i32, 1},
- {Intrinsic::lrint, MVT::v8i32, 1},
- {Intrinsic::lrint, MVT::v16i32, 1},
- {Intrinsic::lrint, MVT::nxv1i32, 1},
- {Intrinsic::lrint, MVT::nxv2i32, 1},
- {Intrinsic::lrint, MVT::nxv4i32, 1},
- {Intrinsic::lrint, MVT::nxv8i32, 1},
- {Intrinsic::lrint, MVT::nxv16i32, 1},
- {Intrinsic::lrint, MVT::v2i64, 1},
- {Intrinsic::lrint, MVT::v4i64, 1},
- {Intrinsic::lrint, MVT::v8i64, 1},
- {Intrinsic::lrint, MVT::v16i64, 1},
- {Intrinsic::lrint, MVT::nxv1i64, 1},
- {Intrinsic::lrint, MVT::nxv2i64, 1},
- {Intrinsic::lrint, MVT::nxv4i64, 1},
- {Intrinsic::lrint, MVT::nxv8i64, 1},
- {Intrinsic::llrint, MVT::v2i64, 1},
- {Intrinsic::llrint, MVT::v4i64, 1},
- {Intrinsic::llrint, MVT::v8i64, 1},
- {Intrinsic::llrint, MVT::v16i64, 1},
- {Intrinsic::llrint, MVT::nxv1i64, 1},
- {Intrinsic::llrint, MVT::nxv2i64, 1},
- {Intrinsic::llrint, MVT::nxv4i64, 1},
- {Intrinsic::llrint, MVT::nxv8i64, 1},
- {Intrinsic::nearbyint, MVT::v2f32, 9},
- {Intrinsic::nearbyint, MVT::v4f32, 9},
- {Intrinsic::nearbyint, MVT::v8f32, 9},
- {Intrinsic::nearbyint, MVT::v16f32, 9},
- {Intrinsic::nearbyint, MVT::nxv1f32, 9},
- {Intrinsic::nearbyint, MVT::nxv2f32, 9},
- {Intrinsic::nearbyint, MVT::nxv4f32, 9},
- {Intrinsic::nearbyint, MVT::nxv8f32, 9},
- {Intrinsic::nearbyint, MVT::nxv16f32, 9},
- {Intrinsic::nearbyint, MVT::v2f64, 9},
- {Intrinsic::nearbyint, MVT::v4f64, 9},
- {Intrinsic::nearbyint, MVT::v8f64, 9},
- {Intrinsic::nearbyint, MVT::v16f64, 9},
- {Intrinsic::nearbyint, MVT::nxv1f64, 9},
- {Intrinsic::nearbyint, MVT::nxv2f64, 9},
- {Intrinsic::nearbyint, MVT::nxv4f64, 9},
- {Intrinsic::nearbyint, MVT::nxv8f64, 9},
- {Intrinsic::bswap, MVT::v2i16, 3},
- {Intrinsic::bswap, MVT::v4i16, 3},
- {Intrinsic::bswap, MVT::v8i16, 3},
- {Intrinsic::bswap, MVT::v16i16, 3},
- {Intrinsic::bswap, MVT::nxv1i16, 3},
- {Intrinsic::bswap, MVT::nxv2i16, 3},
- {Intrinsic::bswap, MVT::nxv4i16, 3},
- {Intrinsic::bswap, MVT::nxv8i16, 3},
- {Intrinsic::bswap, MVT::nxv16i16, 3},
- {Intrinsic::bswap, MVT::v2i32, 12},
- {Intrinsic::bswap, MVT::v4i32, 12},
- {Intrinsic::bswap, MVT::v8i32, 12},
- {Intrinsic::bswap, MVT::v16i32, 12},
- {Intrinsic::bswap, MVT::nxv1i32, 12},
- {Intrinsic::bswap, MVT::nxv2i32, 12},
- {Intrinsic::bswap, MVT::nxv4i32, 12},
- {Intrinsic::bswap, MVT::nxv8i32, 12},
- {Intrinsic::bswap, MVT::nxv16i32, 12},
- {Intrinsic::bswap, MVT::v2i64, 31},
- {Intrinsic::bswap, MVT::v4i64, 31},
- {Intrinsic::bswap, MVT::v8i64, 31},
- {Intrinsic::bswap, MVT::v16i64, 31},
- {Intrinsic::bswap, MVT::nxv1i64, 31},
- {Intrinsic::bswap, MVT::nxv2i64, 31},
- {Intrinsic::bswap, MVT::nxv4i64, 31},
- {Intrinsic::bswap, MVT::nxv8i64, 31},
- {Intrinsic::vp_bswap, MVT::v2i16, 3},
- {Intrinsic::vp_bswap, MVT::v4i16, 3},
- {Intrinsic::vp_bswap, MVT::v8i16, 3},
- {Intrinsic::vp_bswap, MVT::v16i16, 3},
- {Intrinsic::vp_bswap, MVT::nxv1i16, 3},
- {Intrinsic::vp_bswap, MVT::nxv2i16, 3},
- {Intrinsic::vp_bswap, MVT::nxv4i16, 3},
- {Intrinsic::vp_bswap, MVT::nxv8i16, 3},
- {Intrinsic::vp_bswap, MVT::nxv16i16, 3},
- {Intrinsic::vp_bswap, MVT::v2i32, 12},
- {Intrinsic::vp_bswap, MVT::v4i32, 12},
- {Intrinsic::vp_bswap, MVT::v8i32, 12},
- {Intrinsic::vp_bswap, MVT::v16i32, 12},
- {Intrinsic::vp_bswap, MVT::nxv1i32, 12},
- {Intrinsic::vp_bswap, MVT::nxv2i32, 12},
- {Intrinsic::vp_bswap, MVT::nxv4i32, 12},
- {Intrinsic::vp_bswap, MVT::nxv8i32, 12},
- {Intrinsic::vp_bswap, MVT::nxv16i32, 12},
- {Intrinsic::vp_bswap, MVT::v2i64, 31},
- {Intrinsic::vp_bswap, MVT::v4i64, 31},
- {Intrinsic::vp_bswap, MVT::v8i64, 31},
- {Intrinsic::vp_bswap, MVT::v16i64, 31},
- {Intrinsic::vp_bswap, MVT::nxv1i64, 31},
- {Intrinsic::vp_bswap, MVT::nxv2i64, 31},
- {Intrinsic::vp_bswap, MVT::nxv4i64, 31},
- {Intrinsic::vp_bswap, MVT::nxv8i64, 31},
- {Intrinsic::vp_fshl, MVT::v2i8, 7},
- {Intrinsic::vp_fshl, MVT::v4i8, 7},
- {Intrinsic::vp_fshl, MVT::v8i8, 7},
- {Intrinsic::vp_fshl, MVT::v16i8, 7},
- {Intrinsic::vp_fshl, MVT::nxv1i8, 7},
- {Intrinsic::vp_fshl, MVT::nxv2i8, 7},
- {Intrinsic::vp_fshl, MVT::nxv4i8, 7},
- {Intrinsic::vp_fshl, MVT::nxv8i8, 7},
- {Intrinsic::vp_fshl, MVT::nxv16i8, 7},
- {Intrinsic::vp_fshl, MVT::nxv32i8, 7},
- {Intrinsic::vp_fshl, MVT::nxv64i8, 7},
- {Intrinsic::vp_fshl, MVT::v2i16, 7},
- {Intrinsic::vp_fshl, MVT::v4i16, 7},
- {Intrinsic::vp_fshl, MVT::v8i16, 7},
- {Intrinsic::vp_fshl, MVT::v16i16, 7},
- {Intrinsic::vp_fshl, MVT::nxv1i16, 7},
- {Intrinsic::vp_fshl, MVT::nxv2i16, 7},
- {Intrinsic::vp_fshl, MVT::nxv4i16, 7},
- {Intrinsic::vp_fshl, MVT::nxv8i16, 7},
- {Intrinsic::vp_fshl, MVT::nxv16i16, 7},
- {Intrinsic::vp_fshl, MVT::nxv32i16, 7},
- {Intrinsic::vp_fshl, MVT::v2i32, 7},
- {Intrinsic::vp_fshl, MVT::v4i32, 7},
- {Intrinsic::vp_fshl, MVT::v8i32, 7},
- {Intrinsic::vp_fshl, MVT::v16i32, 7},
- {Intrinsic::vp_fshl, MVT::nxv1i32, 7},
- {Intrinsic::vp_fshl, MVT::nxv2i32, 7},
- {Intrinsic::vp_fshl, MVT::nxv4i32, 7},
- {Intrinsic::vp_fshl, MVT::nxv8i32, 7},
- {Intrinsic::vp_fshl, MVT::nxv16i32, 7},
- {Intrinsic::vp_fshl, MVT::v2i64, 7},
- {Intrinsic::vp_fshl, MVT::v4i64, 7},
- {Intrinsic::vp_fshl, MVT::v8i64, 7},
- {Intrinsic::vp_fshl, MVT::v16i64, 7},
- {Intrinsic::vp_fshl, MVT::nxv1i64, 7},
- {Intrinsic::vp_fshl, MVT::nxv2i64, 7},
- {Intrinsic::vp_fshl, MVT::nxv4i64, 7},
- {Intrinsic::vp_fshl, MVT::nxv8i64, 7},
- {Intrinsic::vp_fshr, MVT::v2i8, 7},
- {Intrinsic::vp_fshr, MVT::v4i8, 7},
- {Intrinsic::vp_fshr, MVT::v8i8, 7},
- {Intrinsic::vp_fshr, MVT::v16i8, 7},
- {Intrinsic::vp_fshr, MVT::nxv1i8, 7},
- {Intrinsic::vp_fshr, MVT::nxv2i8, 7},
- {Intrinsic::vp_fshr, MVT::nxv4i8, 7},
- {Intrinsic::vp_fshr, MVT::nxv8i8, 7},
- {Intrinsic::vp_fshr, MVT::nxv16i8, 7},
- {Intrinsic::vp_fshr, MVT::nxv32i8, 7},
- {Intrinsic::vp_fshr, MVT::nxv64i8, 7},
- {Intrinsic::vp_fshr, MVT::v2i16, 7},
- {Intrinsic::vp_fshr, MVT::v4i16, 7},
- {Intrinsic::vp_fshr, MVT::v8i16, 7},
- {Intrinsic::vp_fshr, MVT::v16i16, 7},
- {Intrinsic::vp_fshr, MVT::nxv1i16, 7},
- {Intrinsic::vp_fshr, MVT::nxv2i16, 7},
- {Intrinsic::vp_fshr, MVT::nxv4i16, 7},
- {Intrinsic::vp_fshr, MVT::nxv8i16, 7},
- {Intrinsic::vp_fshr, MVT::nxv16i16, 7},
- {Intrinsic::vp_fshr, MVT::nxv32i16, 7},
- {Intrinsic::vp_fshr, MVT::v2i32, 7},
- {Intrinsic::vp_fshr, MVT::v4i32, 7},
- {Intrinsic::vp_fshr, MVT::v8i32, 7},
- {Intrinsic::vp_fshr, MVT::v16i32, 7},
- {Intrinsic::vp_fshr, MVT::nxv1i32, 7},
- {Intrinsic::vp_fshr, MVT::nxv2i32, 7},
- {Intrinsic::vp_fshr, MVT::nxv4i32, 7},
- {Intrinsic::vp_fshr, MVT::nxv8i32, 7},
- {Intrinsic::vp_fshr, MVT::nxv16i32, 7},
- {Intrinsic::vp_fshr, MVT::v2i64, 7},
- {Intrinsic::vp_fshr, MVT::v4i64, 7},
- {Intrinsic::vp_fshr, MVT::v8i64, 7},
- {Intrinsic::vp_fshr, MVT::v16i64, 7},
- {Intrinsic::vp_fshr, MVT::nxv1i64, 7},
- {Intrinsic::vp_fshr, MVT::nxv2i64, 7},
- {Intrinsic::vp_fshr, MVT::nxv4i64, 7},
- {Intrinsic::vp_fshr, MVT::nxv8i64, 7},
- {Intrinsic::bitreverse, MVT::v2i8, 17},
- {Intrinsic::bitreverse, MVT::v4i8, 17},
- {Intrinsic::bitreverse, MVT::v8i8, 17},
- {Intrinsic::bitreverse, MVT::v16i8, 17},
- {Intrinsic::bitreverse, MVT::nxv1i8, 17},
- {Intrinsic::bitreverse, MVT::nxv2i8, 17},
- {Intrinsic::bitreverse, MVT::nxv4i8, 17},
- {Intrinsic::bitreverse, MVT::nxv8i8, 17},
- {Intrinsic::bitreverse, MVT::nxv16i8, 17},
- {Intrinsic::bitreverse, MVT::v2i16, 24},
- {Intrinsic::bitreverse, MVT::v4i16, 24},
- {Intrinsic::bitreverse, MVT::v8i16, 24},
- {Intrinsic::bitreverse, MVT::v16i16, 24},
- {Intrinsic::bitreverse, MVT::nxv1i16, 24},
- {Intrinsic::bitreverse, MVT::nxv2i16, 24},
- {Intrinsic::bitreverse, MVT::nxv4i16, 24},
- {Intrinsic::bitreverse, MVT::nxv8i16, 24},
- {Intrinsic::bitreverse, MVT::nxv16i16, 24},
- {Intrinsic::bitreverse, MVT::v2i32, 33},
- {Intrinsic::bitreverse, MVT::v4i32, 33},
- {Intrinsic::bitreverse, MVT::v8i32, 33},
- {Intrinsic::bitreverse, MVT::v16i32, 33},
- {Intrinsic::bitreverse, MVT::nxv1i32, 33},
- {Intrinsic::bitreverse, MVT::nxv2i32, 33},
- {Intrinsic::bitreverse, MVT::nxv4i32, 33},
- {Intrinsic::bitreverse, MVT::nxv8i32, 33},
- {Intrinsic::bitreverse, MVT::nxv16i32, 33},
- {Intrinsic::bitreverse, MVT::v2i64, 52},
- {Intrinsic::bitreverse, MVT::v4i64, 52},
- {Intrinsic::bitreverse, MVT::v8i64, 52},
- {Intrinsic::bitreverse, MVT::v16i64, 52},
- {Intrinsic::bitreverse, MVT::nxv1i64, 52},
- {Intrinsic::bitreverse, MVT::nxv2i64, 52},
- {Intrinsic::bitreverse, MVT::nxv4i64, 52},
- {Intrinsic::bitreverse, MVT::nxv8i64, 52},
- {Intrinsic::vp_bitreverse, MVT::v2i8, 17},
- {Intrinsic::vp_bitreverse, MVT::v4i8, 17},
- {Intrinsic::vp_bitreverse, MVT::v8i8, 17},
- {Intrinsic::vp_bitreverse, MVT::v16i8, 17},
- {Intrinsic::vp_bitreverse, MVT::nxv1i8, 17},
- {Intrinsic::vp_bitreverse, MVT::nxv2i8, 17},
- {Intrinsic::vp_bitreverse, MVT::nxv4i8, 17},
- {Intrinsic::vp_bitreverse, MVT::nxv8i8, 17},
- {Intrinsic::vp_bitreverse, MVT::nxv16i8, 17},
- {Intrinsic::vp_bitreverse, MVT::v2i16, 24},
- {Intrinsic::vp_bitreverse, MVT::v4i16, 24},
- {Intrinsic::vp_bitreverse, MVT::v8i16, 24},
- {Intrinsic::vp_bitreverse, MVT::v16i16, 24},
- {Intrinsic::vp_bitreverse, MVT::nxv1i16, 24},
- {Intrinsic::vp_bitreverse, MVT::nxv2i16, 24},
- {Intrinsic::vp_bitreverse, MVT::nxv4i16, 24},
- {Intrinsic::vp_bitreverse, MVT::nxv8i16, 24},
- {Intrinsic::vp_bitreverse, MVT::nxv16i16, 24},
- {Intrinsic::vp_bitreverse, MVT::v2i32, 33},
- {Intrinsic::vp_bitreverse, MVT::v4i32, 33},
- {Intrinsic::vp_bitreverse, MVT::v8i32, 33},
- {Intrinsic::vp_bitreverse, MVT::v16i32, 33},
- {Intrinsic::vp_bitreverse, MVT::nxv1i32, 33},
- {Intrinsic::vp_bitreverse, MVT::nxv2i32, 33},
- {Intrinsic::vp_bitreverse, MVT::nxv4i32, 33},
- {Intrinsic::vp_bitreverse, MVT::nxv8i32, 33},
- {Intrinsic::vp_bitreverse, MVT::nxv16i32, 33},
- {Intrinsic::vp_bitreverse, MVT::v2i64, 52},
- {Intrinsic::vp_bitreverse, MVT::v4i64, 52},
- {Intrinsic::vp_bitreverse, MVT::v8i64, 52},
- {Intrinsic::vp_bitreverse, MVT::v16i64, 52},
- {Intrinsic::vp_bitreverse, MVT::nxv1i64, 52},
- {Intrinsic::vp_bitreverse, MVT::nxv2i64, 52},
- {Intrinsic::vp_bitreverse, MVT::nxv4i64, 52},
- {Intrinsic::vp_bitreverse, MVT::nxv8i64, 52},
- {Intrinsic::ctpop, MVT::v2i8, 12},
- {Intrinsic::ctpop, MVT::v4i8, 12},
- {Intrinsic::ctpop, MVT::v8i8, 12},
- {Intrinsic::ctpop, MVT::v16i8, 12},
- {Intrinsic::ctpop, MVT::nxv1i8, 12},
- {Intrinsic::ctpop, MVT::nxv2i8, 12},
- {Intrinsic::ctpop, MVT::nxv4i8, 12},
- {Intrinsic::ctpop, MVT::nxv8i8, 12},
- {Intrinsic::ctpop, MVT::nxv16i8, 12},
- {Intrinsic::ctpop, MVT::v2i16, 19},
- {Intrinsic::ctpop, MVT::v4i16, 19},
- {Intrinsic::ctpop, MVT::v8i16, 19},
- {Intrinsic::ctpop, MVT::v16i16, 19},
- {Intrinsic::ctpop, MVT::nxv1i16, 19},
- {Intrinsic::ctpop, MVT::nxv2i16, 19},
- {Intrinsic::ctpop, MVT::nxv4i16, 19},
- {Intrinsic::ctpop, MVT::nxv8i16, 19},
- {Intrinsic::ctpop, MVT::nxv16i16, 19},
- {Intrinsic::ctpop, MVT::v2i32, 20},
- {Intrinsic::ctpop, MVT::v4i32, 20},
- {Intrinsic::ctpop, MVT::v8i32, 20},
- {Intrinsic::ctpop, MVT::v16i32, 20},
- {Intrinsic::ctpop, MVT::nxv1i32, 20},
- {Intrinsic::ctpop, MVT::nxv2i32, 20},
- {Intrinsic::ctpop, MVT::nxv4i32, 20},
- {Intrinsic::ctpop, MVT::nxv8i32, 20},
- {Intrinsic::ctpop, MVT::nxv16i32, 20},
- {Intrinsic::ctpop, MVT::v2i64, 21},
- {Intrinsic::ctpop, MVT::v4i64, 21},
- {Intrinsic::ctpop, MVT::v8i64, 21},
- {Intrinsic::ctpop, MVT::v16i64, 21},
- {Intrinsic::ctpop, MVT::nxv1i64, 21},
- {Intrinsic::ctpop, MVT::nxv2i64, 21},
- {Intrinsic::ctpop, MVT::nxv4i64, 21},
- {Intrinsic::ctpop, MVT::nxv8i64, 21},
- {Intrinsic::vp_ctpop, MVT::v2i8, 12},
- {Intrinsic::vp_ctpop, MVT::v4i8, 12},
- {Intrinsic::vp_ctpop, MVT::v8i8, 12},
- {Intrinsic::vp_ctpop, MVT::v16i8, 12},
- {Intrinsic::vp_ctpop, MVT::nxv1i8, 12},
- {Intrinsic::vp_ctpop, MVT::nxv2i8, 12},
- {Intrinsic::vp_ctpop, MVT::nxv4i8, 12},
- {Intrinsic::vp_ctpop, MVT::nxv8i8, 12},
- {Intrinsic::vp_ctpop, MVT::nxv16i8, 12},
- {Intrinsic::vp_ctpop, MVT::v2i16, 19},
- {Intrinsic::vp_ctpop, MVT::v4i16, 19},
- {Intrinsic::vp_ctpop, MVT::v8i16, 19},
- {Intrinsic::vp_ctpop, MVT::v16i16, 19},
- {Intrinsic::vp_ctpop, MVT::nxv1i16, 19},
- {Intrinsic::vp_ctpop, MVT::nxv2i16, 19},
- {Intrinsic::vp_ctpop, MVT::nxv4i16, 19},
- {Intrinsic::vp_ctpop, MVT::nxv8i16, 19},
- {Intrinsic::vp_ctpop, MVT::nxv16i16, 19},
- {Intrinsic::vp_ctpop, MVT::v2i32, 20},
- {Intrinsic::vp_ctpop, MVT::v4i32, 20},
- {Intrinsic::vp_ctpop, MVT::v8i32, 20},
- {Intrinsic::vp_ctpop, MVT::v16i32, 20},
- {Intrinsic::vp_ctpop, MVT::nxv1i32, 20},
- {Intrinsic::vp_ctpop, MVT::nxv2i32, 20},
- {Intrinsic::vp_ctpop, MVT::nxv4i32, 20},
- {Intrinsic::vp_ctpop, MVT::nxv8i32, 20},
- {Intrinsic::vp_ctpop, MVT::nxv16i32, 20},
- {Intrinsic::vp_ctpop, MVT::v2i64, 21},
- {Intrinsic::vp_ctpop, MVT::v4i64, 21},
- {Intrinsic::vp_ctpop, MVT::v8i64, 21},
- {Intrinsic::vp_ctpop, MVT::v16i64, 21},
- {Intrinsic::vp_ctpop, MVT::nxv1i64, 21},
- {Intrinsic::vp_ctpop, MVT::nxv2i64, 21},
- {Intrinsic::vp_ctpop, MVT::nxv4i64, 21},
- {Intrinsic::vp_ctpop, MVT::nxv8i64, 21},
- {Intrinsic::vp_ctlz, MVT::v2i8, 19},
- {Intrinsic::vp_ctlz, MVT::v4i8, 19},
- {Intrinsic::vp_ctlz, MVT::v8i8, 19},
- {Intrinsic::vp_ctlz, MVT::v16i8, 19},
- {Intrinsic::vp_ctlz, MVT::nxv1i8, 19},
- {Intrinsic::vp_ctlz, MVT::nxv2i8, 19},
- {Intrinsic::vp_ctlz, MVT::nxv4i8, 19},
- {Intrinsic::vp_ctlz, MVT::nxv8i8, 19},
- {Intrinsic::vp_ctlz, MVT::nxv16i8, 19},
- {Intrinsic::vp_ctlz, MVT::nxv32i8, 19},
- {Intrinsic::vp_ctlz, MVT::nxv64i8, 19},
- {Intrinsic::vp_ctlz, MVT::v2i16, 28},
- {Intrinsic::vp_ctlz, MVT::v4i16, 28},
- {Intrinsic::vp_ctlz, MVT::v8i16, 28},
- {Intrinsic::vp_ctlz, MVT::v16i16, 28},
- {Intrinsic::vp_ctlz, MVT::nxv1i16, 28},
- {Intrinsic::vp_ctlz, MVT::nxv2i16, 28},
- {Intrinsic::vp_ctlz, MVT::nxv4i16, 28},
- {Intrinsic::vp_ctlz, MVT::nxv8i16, 28},
- {Intrinsic::vp_ctlz, MVT::nxv16i16, 28},
- {Intrinsic::vp_ctlz, MVT::nxv32i16, 28},
- {Intrinsic::vp_ctlz, MVT::v2i32, 31},
- {Intrinsic::vp_ctlz, MVT::v4i32, 31},
- {Intrinsic::vp_ctlz, MVT::v8i32, 31},
- {Intrinsic::vp_ctlz, MVT::v16i32, 31},
- {Intrinsic::vp_ctlz, MVT::nxv1i32, 31},
- {Intrinsic::vp_ctlz, MVT::nxv2i32, 31},
- {Intrinsic::vp_ctlz, MVT::nxv4i32, 31},
- {Intrinsic::vp_ctlz, MVT::nxv8i32, 31},
- {Intrinsic::vp_ctlz, MVT::nxv16i32, 31},
- {Intrinsic::vp_ctlz, MVT::v2i64, 35},
- {Intrinsic::vp_ctlz, MVT::v4i64, 35},
- {Intrinsic::vp_ctlz, MVT::v8i64, 35},
- {Intrinsic::vp_ctlz, MVT::v16i64, 35},
- {Intrinsic::vp_ctlz, MVT::nxv1i64, 35},
- {Intrinsic::vp_ctlz, MVT::nxv2i64, 35},
- {Intrinsic::vp_ctlz, MVT::nxv4i64, 35},
- {Intrinsic::vp_ctlz, MVT::nxv8i64, 35},
- {Intrinsic::vp_cttz, MVT::v2i8, 16},
- {Intrinsic::vp_cttz, MVT::v4i8, 16},
- {Intrinsic::vp_cttz, MVT::v8i8, 16},
- {Intrinsic::vp_cttz, MVT::v16i8, 16},
- {Intrinsic::vp_cttz, MVT::nxv1i8, 16},
- {Intrinsic::vp_cttz, MVT::nxv2i8, 16},
- {Intrinsic::vp_cttz, MVT::nxv4i8, 16},
- {Intrinsic::vp_cttz, MVT::nxv8i8, 16},
- {Intrinsic::vp_cttz, MVT::nxv16i8, 16},
- {Intrinsic::vp_cttz, MVT::nxv32i8, 16},
- {Intrinsic::vp_cttz, MVT::nxv64i8, 16},
- {Intrinsic::vp_cttz, MVT::v2i16, 23},
- {Intrinsic::vp_cttz, MVT::v4i16, 23},
- {Intrinsic::vp_cttz, MVT::v8i16, 23},
- {Intrinsic::vp_cttz, MVT::v16i16, 23},
- {Intrinsic::vp_cttz, MVT::nxv1i16, 23},
- {Intrinsic::vp_cttz, MVT::nxv2i16, 23},
- {Intrinsic::vp_cttz, MVT::nxv4i16, 23},
- {Intrinsic::vp_cttz, MVT::nxv8i16, 23},
- {Intrinsic::vp_cttz, MVT::nxv16i16, 23},
- {Intrinsic::vp_cttz, MVT::nxv32i16, 23},
- {Intrinsic::vp_cttz, MVT::v2i32, 24},
- {Intrinsic::vp_cttz, MVT::v4i32, 24},
- {Intrinsic::vp_cttz, MVT::v8i32, 24},
- {Intrinsic::vp_cttz, MVT::v16i32, 24},
- {Intrinsic::vp_cttz, MVT::nxv1i32, 24},
- {Intrinsic::vp_cttz, MVT::nxv2i32, 24},
- {Intrinsic::vp_cttz, MVT::nxv4i32, 24},
- {Intrinsic::vp_cttz, MVT::nxv8i32, 24},
- {Intrinsic::vp_cttz, MVT::nxv16i32, 24},
- {Intrinsic::vp_cttz, MVT::v2i64, 25},
- {Intrinsic::vp_cttz, MVT::v4i64, 25},
- {Intrinsic::vp_cttz, MVT::v8i64, 25},
- {Intrinsic::vp_cttz, MVT::v16i64, 25},
- {Intrinsic::vp_cttz, MVT::nxv1i64, 25},
- {Intrinsic::vp_cttz, MVT::nxv2i64, 25},
- {Intrinsic::vp_cttz, MVT::nxv4i64, 25},
- {Intrinsic::vp_cttz, MVT::nxv8i64, 25},
+ {Intrinsic::floor, MVT::f32, 9},
+ {Intrinsic::floor, MVT::f64, 9},
+ {Intrinsic::ceil, MVT::f32, 9},
+ {Intrinsic::ceil, MVT::f64, 9},
+ {Intrinsic::trunc, MVT::f32, 7},
+ {Intrinsic::trunc, MVT::f64, 7},
+ {Intrinsic::round, MVT::f32, 9},
+ {Intrinsic::round, MVT::f64, 9},
+ {Intrinsic::roundeven, MVT::f32, 9},
+ {Intrinsic::roundeven, MVT::f64, 9},
+ {Intrinsic::rint, MVT::f32, 7},
+ {Intrinsic::rint, MVT::f64, 7},
+ {Intrinsic::lrint, MVT::i32, 1},
+ {Intrinsic::lrint, MVT::i64, 1},
+ {Intrinsic::llrint, MVT::i64, 1},
+ {Intrinsic::nearbyint, MVT::f32, 9},
+ {Intrinsic::nearbyint, MVT::f64, 9},
+ {Intrinsic::bswap, MVT::i16, 3},
+ {Intrinsic::bswap, MVT::i32, 12},
+ {Intrinsic::bswap, MVT::i64, 31},
+ {Intrinsic::vp_bswap, MVT::i16, 3},
+ {Intrinsic::vp_bswap, MVT::i32, 12},
+ {Intrinsic::vp_bswap, MVT::i64, 31},
+ {Intrinsic::vp_fshl, MVT::i8, 7},
+ {Intrinsic::vp_fshl, MVT::i16, 7},
+ {Intrinsic::vp_fshl, MVT::i32, 7},
+ {Intrinsic::vp_fshl, MVT::i64, 7},
+ {Intrinsic::vp_fshr, MVT::i8, 7},
+ {Intrinsic::vp_fshr, MVT::i16, 7},
+ {Intrinsic::vp_fshr, MVT::i32, 7},
+ {Intrinsic::vp_fshr, MVT::i64, 7},
+ {Intrinsic::bitreverse, MVT::i8, 17},
+ {Intrinsic::bitreverse, MVT::i16, 24},
+ {Intrinsic::bitreverse, MVT::i32, 33},
+ {Intrinsic::bitreverse, MVT::i64, 52},
+ {Intrinsic::vp_bitreverse, MVT::i8, 17},
+ {Intrinsic::vp_bitreverse, MVT::i16, 24},
+ {Intrinsic::vp_bitreverse, MVT::i32, 33},
+ {Intrinsic::vp_bitreverse, MVT::i64, 52},
+ {Intrinsic::ctpop, MVT::i8, 12},
+ {Intrinsic::ctpop, MVT::i16, 19},
+ {Intrinsic::ctpop, MVT::i32, 20},
+ {Intrinsic::ctpop, MVT::i64, 21},
+ {Intrinsic::vp_ctpop, MVT::i8, 12},
+ {Intrinsic::vp_ctpop, MVT::i16, 19},
+ {Intrinsic::vp_ctpop, MVT::i32, 20},
+ {Intrinsic::vp_ctpop, MVT::i64, 21},
+ {Intrinsic::vp_ctlz, MVT::i8, 19},
+ {Intrinsic::vp_ctlz, MVT::i16, 28},
+ {Intrinsic::vp_ctlz, MVT::i32, 31},
+ {Intrinsic::vp_ctlz, MVT::i64, 35},
+ {Intrinsic::vp_cttz, MVT::i8, 16},
+ {Intrinsic::vp_cttz, MVT::i16, 23},
+ {Intrinsic::vp_cttz, MVT::i32, 24},
+ {Intrinsic::vp_cttz, MVT::i64, 25},
};
static unsigned getISDForVPIntrinsicID(Intrinsic::ID ID) {
@@ -1251,10 +818,13 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
}
if (ST->hasVInstructions() && RetTy->isVectorTy()) {
- auto LT = getTypeLegalizationCost(RetTy);
- if (const auto *Entry = CostTableLookup(VectorIntrinsicCostTable,
- ICA.getID(), LT.second))
- return LT.first * Entry->Cost;
+ if (auto LT = getTypeLegalizationCost(RetTy);
+ LT.second.isVector()) {
+ MVT EltTy = LT.second.getVectorElementType();
+ if (const auto *Entry = CostTableLookup(VectorIntrinsicCostTable,
+ ICA.getID(), EltTy))
+ return LT.first * Entry->Cost;
+ }
}
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
More information about the llvm-commits
mailing list